Esempio n. 1
0
 def run_agadir_on_multifastas(self, path_fastafile: str, path_dst: str):
     """
     To run agadir on file that has multiple fasta sequences. Iterate through the list, write the sequence to an
     individual text file passing this to agadir.compute(). This newly-written individual text file is then deleted,
     crucial to prevent too much memory taken up by each file when running large numbers of sequences.
     :param path_fastafile: Abs path to fasta file.
     :param path_dst: Abs path to output root dir.
     """
     with open(path_fastafile) as f:
         is_first_line = True
         fasta_str = ''
         mutantfastafilename = ''
         mutantfastafile = ''
         path_dst_mutant_filename = ''
         for line in f.readlines():
             if '>' in line:
                 if not is_first_line:
                     path_dst_mutant_filename = GUM.os_makedirs(
                         path_dst, mutantfastafilename)
                     path_dst_mutant_file = os.path.join(
                         path_dst_mutant_filename, mutantfastafile)
                     with open(path_dst_mutant_file, 'w') as g:
                         g.write(fasta_str)
                     agadir = Agadir(Cond.INCELL_MAML.value)
                     agadir.compute(path_dst_mutant_file)
                     GUM.linux_remove_file(path_dst_mutant_file)
                 fasta_str = line
                 is_first_line = False
                 mutantfastafilename = line.split('>')[-1].split('\n')[0]
                 mutantfastafile = mutantfastafilename + Str.FSTAEXT.value
             else:
                 fasta_str += line
    res = regex.search(filename)
    regex2 = re.compile(r"RepairPDB_[0-9]+_[A-Y][A-Y][0-9]+[A-Z]")
    res2 = regex2.search(filename)
    fxmutname = filename[res.end():res2.end()]
    return fxmutname


"""
3. Read csv files and write to a single csvfile for mysql dump. 
"""
for path_output_csvfile in path_output_csvfiles:
    csvfilename = os.path.basename(path_output_csvfile)
    pdbname = _extract_pdbname(csvfilename)
    fxmutantname = _extract_fxmutantname(csvfilename)
    if using_cluster:
        path_jobq_dir = GUM.os_makedirs(Paths.CONFIG_BM_JOBQ, pdbname,
                                        fxmutantname)
        Cluster.write_job_q_bash(
            jobname=Paths.PREFIX_FX_RM.value + fxmutantname,
            path_job_q_dir=path_jobq_dir,
            python_script_with_paths=os.path.join(
                Paths.SE_SRC_CLSTR_PYSCRPTS.value,
                'write_1csvfile_from_csvPerMutantfiles_zeus.py' +
                Str.SPCE.value + path_output_csvfile + Str.SPCE.value +
                path_output_ac_or_bm_dir + Str.SPCE.value + pdbname))
        Cluster.run_job_q(path_job_q_dir=path_jobq_dir)
    else:
        GUM.write_1_csvfile_from_csv_per_mutants(path_output_csvfile,
                                                 path_output_ac_or_bm_dir,
                                                 pdbname)

# pydevd.stoptrace()
Esempio n. 3
0
path_output = sys.argv[2]
path_config_job = sys.argv[3]
path_output_blastp = sys.argv[4]
write_idmaps_for_mysldb = sys.argv[5] == 'True'
write_csv = sys.argv[6] == 'True'
write_xml = sys.argv[7] == 'True'
write_json = sys.argv[8] == 'True'


for path_fastafile in path_input_fastafile_list:
    with open(path_fastafile) as fastafile_opened:
        fastafile_name = path_fastafile.split('/')[-1].split('.')[0]
        jobname = 'BLSTP_' + fastafile_name
        Cluster.write_job_q_bash(jobname=jobname, path_job_q_dir=path_config_job, queue='all.q', memory_limit_GB='3',
                                 cluster_node='hodor1.vib')
        path_output_blastp_fastaname = GUM.os_makedirs(path_output_blastp, fastafile_name)
        os.chdir(path_output_blastp_fastaname)
        Cluster.run_job_q(path_job_q_dir=path_config_job)
        Cluster.wait_for_grid_engine_job_to_complete(grid_engine_job_prefix_or_full_name=jobname)
        path_raw_blstp_xml = IdProt._write_raw_blast_xml(path_output, fastafile_name,
                                                         blastp_result=NCBIWWW.qblast(
                                                             program=Biopy.BlastParam.BLST_P.value,
                                                             database=Biopy.BlastParam.SWSPRT.value,
                                                             sequence=fastafile_opened.read(),
                                                             entrez_query=Biopy.BlastParam.HOMSAP_ORG.value,
                                                             alignments=Biopy.BlastParam.MAX_ALIGN_20.value,
                                                             hitlist_size=Biopy.BlastParam.MAX_HIT_20.value))
        blastp_dict = Biopy.parse_filter_blastp_xml_to_dict(path_raw_blstp_xml, fastafile_name, path_fastafile)
        # blastp_dict_list.append(blastp_dict)
        if write_idmaps_for_mysldb:
            IdProt._write_idmaps_for_mysqldb(path_output, blastp_dict, write_csv=write_csv, write_xml=write_xml,
Esempio n. 4
0
    def _write_mutants(self,
                       title_titleSeq_w_mutants: dict,
                       write_1_fasta_only: bool,
                       write_fasta_per_mut: bool,
                       path_output_3dots: str,
                       write_csv=False,
                       write_txt=False):
        """
        Writes the mutants out to fastafiles and/or csv files and/or txt files. These can be written in one file
        containing all mutants or one file per mutant.
        The fastafiles are written to /output_data/<fastafilename>/mutants/.
        The csvfiles and txtfiles are written to /output_data/<fastafilename/sequences/.
        The reason for fastafile mutants being written to an input folder is that these sequences are generated as
        direct inputs for the mutation operation.
        :param title_titleSeq_w_mutants: Title of wild-type associated to every mutant title:sequence.
        :param write_1_fasta_only: True to write one fastafile containing all mutants, separated by \n.
        :param write_fasta_per_mut: True to write one fastafile per mutant.
        :param path_output_3dots: Absolute path of output_data dir (where fasta, txt, csv written), includes to
        subdirs: /fastas/xxxx...yyyy/ e.g. 1001...2000
        :param write_csv: True to write 1 csv file for wt & mutants. False by default.
        :param write_txt: True to write 1 txt file for wt & mutants. False by default.
        """
        path_1_fastafile = None
        path_1_fastafile_open = None
        path_fastafilepermut = None
        path_seqscsv = None
        path_seqscsv_open = None
        path_seqstxt = None
        path_seqstxt_open = None

        for wt_title, title_seq in title_titleSeq_w_mutants.items():
            for mut_title, mut_seq in title_seq.items():
                if write_1_fasta_only and path_1_fastafile is None:
                    path_1_fastafile = GUM.os_makedirs(path_output_3dots,
                                                       wt_title,
                                                       Paths.DIR_MUTANTS.value)
                    path_1_fastafile = os.path.join(
                        path_1_fastafile, wt_title + '_mutants.fasta')
                    path_1_fastafile_open = open(path_1_fastafile, 'w')
                    path_1_fastafile_open.write('>' + wt_title + '\n' +
                                                title_seq[wt_title] + '\n')
                if write_fasta_per_mut and path_fastafilepermut is None:
                    path_fastafilepermut = GUM.os_makedirs(
                        path_output_3dots, Paths.DIR_FASTAS.value, wt_title)
                    path_fastafilepermut = os.path.join(
                        path_fastafilepermut, wt_title + Str.FSTAEXT.value)
                    path_fastafilepermut_open = open(path_fastafilepermut, 'w')
                    path_fastafilepermut_open.write('>' + wt_title + '\n' +
                                                    title_seq[wt_title] + '\n')
                    path_fastafilepermut_open.close()
                if write_csv and path_seqscsv is None:
                    path_seqscsv = GUM.os_makedirs(
                        path_output_3dots, Paths.DIR_SEQS_TXT_CSV.value,
                        wt_title)
                    path_seqscsv = os.path.join(path_seqscsv,
                                                wt_title + '_mutants.csv')
                    path_seqscsv_open = open(path_seqscsv, 'w')
                    path_seqscsv_open.write(wt_title + ':' +
                                            title_seq[wt_title] + ',')
                if write_txt and path_seqstxt is None:
                    path_seqstxt = GUM.os_makedirs(
                        path_output_3dots, Paths.DIR_SEQS_TXT_CSV.value,
                        wt_title)
                    path_seqstxt = os.path.join(path_seqstxt,
                                                wt_title + '_mutants.txt')
                    path_seqstxt_open = open(path_seqstxt, 'w')
                    path_seqstxt_open.write(wt_title + ':' +
                                            title_seq[wt_title] + '\n')
                elif mut_title is not wt_title:
                    if write_1_fasta_only and path_1_fastafile_open is not None:
                        path_1_fastafile_open.write('>' + mut_title + '\n' +
                                                    mut_seq + '\n')
                    if write_fasta_per_mut and path_fastafilepermut is not None:
                        path_fastafilepermut = os.path.join(
                            path_output_3dots, Paths.DIR_FASTAS.value,
                            wt_title, Paths.DIR_MUTANTS.value,
                            mut_title + Str.FSTAEXT.value)
                        path_fastafilepermut_open = open(
                            path_fastafilepermut, 'w')
                        path_fastafilepermut_open.write('>' + mut_title +
                                                        '\n' + mut_seq + '\n')
                        path_fastafilepermut_open.close()
                    if write_csv and path_seqscsv_open is not None:
                        path_seqscsv_open.write(mut_title + ':' + mut_seq +
                                                ',')
                    if write_txt and path_seqstxt_open is not None:
                        path_seqstxt_open.write(mut_title + ':' + mut_seq +
                                                '\n')

        if path_1_fastafile_open is not None:
            path_1_fastafile_open.close()
        if path_seqstxt_open is not None:
            path_seqstxt_open.close()
        if path_seqscsv_open is not None:
            path_seqscsv_open.close()
Esempio n. 5
0
:sys.argv[1] path_fastafile: Abs path to fasta file.
:sys.argv[2] path_dst: Abs path to output root dir.
"""

print('run_agadir_on_multifastas_zeus.py ###################################')
path_dst = GUM.make_path_agadir_3dots_filename_mutants_dirs(
    path_dst, path_fastafile, add_filename_subdir=True)
with open(path_fastafile) as f:
    is_first_line = True
    fasta_str = ''
    mutantfastafilename = ''
    mutantfastafile = ''
    for line in f.readlines():
        if '>' in line:
            if not is_first_line:
                path_dst_mutant_filename = GUM.os_makedirs(
                    path_dst, mutantfastafilename)
                path_dst_mutant_file = os.path.join(path_dst_mutant_filename,
                                                    mutantfastafile)
                with open(path_dst_mutant_file, 'w') as g:
                    g.write(fasta_str)
                agadir = Agadir(Cond.INCELL_MAML.value)
                agadir.compute(path_dst_mutant_file)
                GUM.linux_remove_file(path_dst_mutant_file)
            fasta_str = line
            is_first_line = False
            mutantfastafilename = line.split('>')[-1].split('\n')[0]
            mutantfastafile = mutantfastafilename + Str.FSTAEXT.value
        else:
            fasta_str += line