def create_crosstable(sico_files, target_crosstable): """Create crosstable with vertically the orthologs, horizontally the genomes, and gene IDs at intersections.""" with open(target_crosstable, mode='w') as write_handle: #Create dictionaries mapping genomes to gene IDs per sico file row_data = [(sico_file, dict(itemgetter(0, 2)(fasta_record.id.split('|')) for fasta_record in SeqIO.parse(sico_file, 'fasta'))) for sico_file in sico_files] #Retrieve unique genomes across all sico files, just to be safe genomes = sorted(set(key for row in row_data for key in row[1].keys())) genome_dicts = select_genomes_by_ids(genomes).values() #Write out values to file write_handle.write('\t' + '\t'.join(genomes)) write_handle.write('\tCOGs\tProduct\n') for sico_file, row in row_data: ortholog = os.path.split(sico_file)[1].split('.')[0] write_handle.write(ortholog + '\t') write_handle.write('\t'.join(row.get(genome, '') for genome in genomes)) #Parse sequence records again, but now to retrieve cogs and products seq_records = list(SeqIO.parse(sico_file, 'fasta')) #COGs cogs = find_cogs_in_sequence_records(seq_records) write_handle.write('\t' + ','.join(cogs)) #Product product = get_most_recent_gene_name(genome_dicts, seq_records) write_handle.write('\t' + product) #New line write_handle.write('\n')
def main(args): """Main function called when run from command line or as part of pipeline.""" usage = """ Usage: concatenate_orthologs.py --orthologs-zip=FILE archive of orthologous genes in FASTA format --coding-regions=FILE destination file path archive of trimmed orthologous coding regions per genomes --concatemer=FILE destination file path for super-concatemer of all genomes --taxon-a=FILE destination file path for genome IDs for taxon A --taxon-b=FILE destination file path for genome IDs for taxon B --tree=FILE destination file path for tree visualization """ options = ['orthologs-zip', 'coding-regions', 'concatemer', 'taxon-a', 'taxon-b', 'tree'] orthologs_zip, target_coding_regions, target_concat_file, target_taxon_a, target_taxon_b, target_tree = \ parse_options(usage, options, args) #Run filtering in a temporary folder, to prevent interference from simultaneous runs run_dir = tempfile.mkdtemp(prefix='concatemer_tree_') #Extract files from zip archive temp_dir = create_directory('orthologs', inside_dir=run_dir) ortholog_files = extract_archive_of_files(orthologs_zip, temp_dir) #Separate out orthologs per genome to create trimmed coding region files per genome genome_coding_regions_files = coding_regions_per_genome(run_dir, ortholog_files) create_archive_of_files(target_coding_regions, genome_coding_regions_files) #Concatenate coding region files per genome concatemer_files = concatemer_per_genome(run_dir, genome_coding_regions_files) #Create super concatemer create_super_concatemer(concatemer_files, target_concat_file) #Determine the taxa present in the super concatemer tree by building a phylogenetic tree from genome concatemer and #reading genome ids in the two largest clades. super_distance_file = _run_dna_dist(run_dir, target_concat_file) super_tree_file = _run_neighbor(run_dir, super_distance_file) genome_ids_a, genome_ids_b = _read_taxa_from_tree(super_tree_file) #Map Project IDs to Organism names id_to_name_map = dict((gid, genome['Organism/Name']) for gid, genome in select_genomes_by_ids(genome_ids_a + genome_ids_b).iteritems()) #Write Project IDs and Organism Names to files, with a fallback to genome_id for external genome with open(target_taxon_a, mode='w') as write_handle: for genome_id in genome_ids_a: write_handle.write('{id}\t{name}\n'.format(id=genome_id, name=id_to_name_map.get(genome_id, genome_id))) with open(target_taxon_b, mode='w') as write_handle: for genome_id in genome_ids_b: write_handle.write('{id}\t{name}\n'.format(id=genome_id, name=id_to_name_map.get(genome_id, genome_id))) #Visualize tree visualize_tree(super_tree_file, id_to_name_map, target_tree) #Remove unused files to free disk space shutil.rmtree(run_dir) #Exit after a comforting log message log.info('Produced: \n%s\n%s\n%s\n%s\n%s', target_coding_regions, target_concat_file, target_taxon_a, target_taxon_b, target_tree)
def main(args): """Main function called when run from command line or as part of pipeline.""" usage = """ Usage: translate.py --genomes=FILE file with selected genome IDs followed by Organism Name on each line --external-zip=FILE optional archive of user provided external genomes containing formatted nucleotide fasta files --dna-zip=FILE destination file path for zip archive of extracted DNA files --protein-zip=FILE destination file path for zip archive of translated protein files """ options = ['genomes', 'external-zip=?', 'dna-zip', 'protein-zip'] genome_ids_file, external_zip, dna_zipfile, protein_zipfile = parse_options(usage, options, args) dna_files = [] protein_files = [] #Read GenBank Project IDs from genomes_file, each on their own line with open(genome_ids_file) as read_handle: genome_ids = [line.split()[0] for line in read_handle if not line.startswith('#') and 'external genome' not in line] if len(genome_ids): #Retrieve associated genome dictionaries from complete genomes table genomes = select_genomes_by_ids(genome_ids).values() genomes = sorted(genomes, key=itemgetter('Organism/Name')) #Actually translate the genomes to produced a set of files for both dna files & protein files dna_files, protein_files = translate_genomes(genomes) #Also translate the external genomes if external_zip: #Extract external genomes archive external_dir = tempfile.mkdtemp(prefix='external_genomes_') external_dna_files = extract_archive_of_files(external_zip, external_dir) #Append IDs of external fasta files to genome IDs file _append_external_genomes(external_dna_files, genome_ids_file) #Translate individual files external_protein_files = [translate_fasta_coding_regions(dna_file) for dna_file in external_dna_files] #Add the files to the appropriate collections dna_files.extend(external_dna_files) protein_files.extend(external_protein_files) #Write the produced files to command line argument filenames create_archive_of_files(dna_zipfile, dna_files) create_archive_of_files(protein_zipfile, protein_files) #Do not clean up extracted DNA files or Protein translations: Keep them as cache #But do clean up external_dir now that the compressed archives are created if external_zip: shutil.rmtree(external_dir) #Exit after a comforting log message log.info("Produced: \n%s &\n%s", dna_zipfile, protein_zipfile)
def _table_calculations(genome_ids_a, genome_ids_b, sico_files, phipack_values): '''Perform calculations for comparsion of genome_ids_a with genome_ids_b.''' # retrieve genomes once for both genomes_a = select_genomes_by_ids(genome_ids_a).values() # dictionary to hold the values calculated per file calculations = [] # loop over orthologs for sico_file in sico_files: # parse alignment alignment = AlignIO.read(sico_file, 'fasta') # split alignments alignment_a = MultipleSeqAlignment(seqr for seqr in alignment if seqr.id.split('|')[0] in genome_ids_a) alignment_b = MultipleSeqAlignment(seqr for seqr in alignment if seqr.id.split('|')[0] in genome_ids_b) # calculate codeml values codeml_values = _get_codeml_values(alignment_a, alignment_b) # create gathering instance of clade_calcs instance = clade_calcs(alignment_a, genomes_a) # store ortholog name retrieved from filename ortholog = os.path.basename(sico_file).split('.')[0] instance.values[ORTHOLOG] = ortholog # add codeml_values to clade_calcs instance values instance.values.update(codeml_values) # add phipack values for this file instance.values.update(phipack_values[sico_file]) # add COG digits and letters _extract_cog_digits_and_letters(instance) # add SFS related values _codon_site_freq_spec(instance) # add additional deduced calculation _add_combined_calculations(instance) # store the clade_calc values calculations.append(instance) # calculcate mean and averages max_nton = len(genome_ids_a) // 2 sum_stats, mean_stats = _calculcate_mean_and_averages(calculations, max_nton) # neutrality index calculation and bootstrapping ni_stats, ni_lower_stats, ni_upper_stats = _neutrality_indices(calculations) # finally append statistics to calculations so they show up in file calculations.extend((sum_stats, mean_stats, ni_stats, ni_lower_stats, ni_upper_stats)) return calculations
def _occurences_and_cogs(genome_ids, ortholog_files): """Generator that returns how many sequences exist per genome in each ortholog in order and which COGs occur.""" genomes = select_genomes_by_ids(genome_ids).values() for fasta_file in ortholog_files: records = tuple(SeqIO.parse(fasta_file, 'fasta')) ids = [record.id.split('|')[0] for record in records] count_per_id = [ids.count(genome_id) for genome_id in genome_ids] cogs = sorted(find_cogs_in_sequence_records(records)) ortholog_nr = os.path.splitext(os.path.split(fasta_file)[1])[0] for record in records: #SeqIO mucks up ids containing spaces, so we have to assign description as value for id record.id = record.description product = get_most_recent_gene_name(genomes, records) yield count_per_id, ortholog_nr, cogs, product
def _phipack_for_all_orthologs(run_dir, aligned_files, stats_file): """Filter aligned fasta files where there is evidence of recombination when inspecting PhiPack values. Return two collections of aligned files, the first without recombination, the second with recombination.""" log.info('Running PhiPack for %i orthologs to find recombination', len(aligned_files)) #Create separate directory for phipack related values phipack_dir = create_directory('phipack', inside_dir=run_dir) with open(stats_file, mode='w') as write_handle: write_handle.write('\t'.join(['Ortholog', 'Informative sites', 'Phi', 'Max Chi^2', 'NSS', 'COGs', 'Product']) + '\n') #Retrieve unique genomes from first ortholog file genome_ids = set(fasta_record.id.split('|')[0] for fasta_record in SeqIO.parse(aligned_files[0], 'fasta')) genome_dicts = select_genomes_by_ids(genome_ids).values() #Assign ortholog files to the correct collection based on whether they show recombination for ortholog_file in aligned_files: orth_name = os.path.split(ortholog_file)[1].split('.')[0] #Parse tree file to ensure all genome_ids_a & genome_ids_b group together in the tree phipack_values = run_phipack(phipack_dir, ortholog_file) #Write PhiPack values to line write_handle.write('{0}\t{1[PhiPack sites]}\t{1[Phi]}\t{1[Max Chi^2]}\t{1[NSS]}'.format(orth_name, phipack_values)) #Parse sequence records again, but now to retrieve cogs and products seq_records = list(SeqIO.parse(ortholog_file, 'fasta')) #COGs cogs = find_cogs_in_sequence_records(seq_records) write_handle.write('\t' + ','.join(cogs)) #Product product = get_most_recent_gene_name(genome_dicts, seq_records) write_handle.write('\t' + product) #End line write_handle.write('\n')
def create_crosstable(sico_files, target_crosstable): """Create crosstable with vertically the orthologs, horizontally the genomes, and gene IDs at intersections.""" with open(target_crosstable, mode='w') as write_handle: #Create dictionaries mapping genomes to gene IDs per sico file row_data = [(sico_file, dict( itemgetter(0, 2)(fasta_record.id.split('|')) for fasta_record in SeqIO.parse(sico_file, 'fasta'))) for sico_file in sico_files] #Retrieve unique genomes across all sico files, just to be safe genomes = sorted(set(key for row in row_data for key in row[1].keys())) genome_dicts = select_genomes_by_ids(genomes).values() #Write out values to file write_handle.write('\t' + '\t'.join(genomes)) write_handle.write('\tCOGs\tProduct\n') for sico_file, row in row_data: ortholog = os.path.split(sico_file)[1].split('.')[0] write_handle.write(ortholog + '\t') write_handle.write('\t'.join( row.get(genome, '') for genome in genomes)) #Parse sequence records again, but now to retrieve cogs and products seq_records = list(SeqIO.parse(sico_file, 'fasta')) #COGs cogs = find_cogs_in_sequence_records(seq_records) write_handle.write('\t' + ','.join(cogs)) #Product product = get_most_recent_gene_name(genome_dicts, seq_records) write_handle.write('\t' + product) #New line write_handle.write('\n')
def calculate_tables(genome_ids_a, genome_ids_b, sico_files, oddeven=False): """Compute a spreadsheet of data points each for A and B based the SICO files, without duplicating computations.""" #Convert file names into identifiers while preserving filenames, as filenames are used both for BioPython & PhiPack orth_files = [(os.path.split(sico_file)[1].split('.')[0], sico_file) for sico_file in sico_files] #Find PhiPack values for each sico file orth_phipack_values = _phipack_values_for_sicos(orth_files) #Convert list of sico files into ortholog name mapped to BioPython Alignment object sico_alignments = [(ortholog, AlignIO.read(sico_file, 'fasta')) for ortholog, sico_file in orth_files] #Only retrieve genomes once which we'll use to link gene names to orthologs all_genome_ids = list(genome_ids_a) all_genome_ids.extend(genome_ids_b) genomes = select_genomes_by_ids(all_genome_ids).values() #For each ortholog, determine the newest gene name across taxa so unannotated taxa also get gene names ortholog_gene_names = dict( (ortholog, get_most_recent_gene_name(genomes, alignmnt)) for ortholog, alignmnt in sico_alignments) #Split individual sico alignments into separate alignments for each of the clades per ortholog #These split alignments can later be reversed and/or subselections can be made to calculate for alternate alignments split_alignments = [ (ortholog, MultipleSeqAlignment(seqr for seqr in alignmnt if seqr.id.split('|')[0] in genome_ids_a), MultipleSeqAlignment(seqr for seqr in alignmnt if seqr.id.split('|')[0] in genome_ids_b)) for ortholog, alignmnt in sico_alignments ] #Calculate tables for normal sico alignments log.info('Starting calculations for full alignments') table_a, table_b = _tables_for_split_alignments(split_alignments, ortholog_gene_names, orth_phipack_values) if not oddeven: return table_a, table_b #As an alternate method of calculating number of substitutions for independent X-axis of eventual graph: #split each alignment for a and b into two further alignments of odd and even codons odd_even_split_orth_alignments = [ (orthologname, _every_other_codon_alignments(alignment_x), _every_other_codon_alignments(alignment_y)) for orthologname, alignment_x, alignment_y in split_alignments ] #Recover odd alignments as first from each pair of alignments odd_split_alignments = [(orthologname, odd_even_x[0], odd_even_y[0]) for orthologname, odd_even_x, odd_even_y in odd_even_split_orth_alignments] #Create files for all the odd codon alignments, so we can run PhiPack for them odd_alignments_dir = tempfile.mkdtemp(prefix='odd_codon_alignments_') odd_files = dict( (ortholog, os.path.join(odd_alignments_dir, ortholog + '.ffn')) for ortholog, odd_x, odd_y in odd_split_alignments) for ortholog, odd_x, odd_y in odd_split_alignments: AlignIO.write([odd_x, odd_y], odd_files[ortholog], 'fasta') odd_phipack_vals = _phipack_values_for_sicos(odd_files.items()) shutil.rmtree(odd_alignments_dir) #Calculate tables for odd codon sico alignments log.info('Starting calculations for odd alignments') table_a_odd, table_b_odd = _tables_for_split_alignments( odd_split_alignments, ortholog_gene_names, odd_phipack_vals) #Recover even alignments as second from each pair of alignments even_split_alignments = [(orthologname, odd_even_x[1], odd_even_y[1]) for orthologname, odd_even_x, odd_even_y in odd_even_split_orth_alignments] #Create files for all the odd codon alignments, so we can run PhiPack for them even_alignments_dir = tempfile.mkdtemp(prefix='even_codon_alignments_') even_files = dict( (ortholog, os.path.join(even_alignments_dir, ortholog + '.ffn')) for ortholog, even_x, even_y in even_split_alignments) for ortholog, even_x, even_y in even_split_alignments: AlignIO.write([even_x, even_y], even_files[ortholog], 'fasta') even_phipack_vals = _phipack_values_for_sicos(even_files.items()) shutil.rmtree(even_alignments_dir) #Calculate tables for even codon sico alignments log.info('Starting calculations for even alignments') table_a_even, table_b_even = _tables_for_split_alignments( even_split_alignments, ortholog_gene_names, even_phipack_vals) #Concatenate tables and return their values table_a_full = tempfile.mkstemp(suffix='.tsv', prefix='table_a_full_')[1] table_b_full = tempfile.mkstemp(suffix='.tsv', prefix='table_b_full_')[1] concatenate(table_a_full, [table_a, table_a_odd, table_a_even]) concatenate(table_b_full, [table_b, table_b_odd, table_b_even]) return table_a_full, table_b_full
def main(args): """Main function called when run from command line or as part of pipeline.""" usage = """ Usage: concatenate_orthologs.py --orthologs-zip=FILE archive of orthologous genes in FASTA format --coding-regions=FILE destination file path archive of trimmed orthologous coding regions per genomes --concatemer=FILE destination file path for super-concatemer of all genomes --taxon-a=FILE destination file path for genome IDs for taxon A --taxon-b=FILE destination file path for genome IDs for taxon B --tree=FILE destination file path for tree visualization """ options = [ 'orthologs-zip', 'coding-regions', 'concatemer', 'taxon-a', 'taxon-b', 'tree' ] orthologs_zip, target_coding_regions, target_concat_file, target_taxon_a, target_taxon_b, target_tree = \ parse_options(usage, options, args) #Run filtering in a temporary folder, to prevent interference from simultaneous runs run_dir = tempfile.mkdtemp(prefix='concatemer_tree_') #Extract files from zip archive temp_dir = create_directory('orthologs', inside_dir=run_dir) ortholog_files = extract_archive_of_files(orthologs_zip, temp_dir) #Separate out orthologs per genome to create trimmed coding region files per genome genome_coding_regions_files = coding_regions_per_genome( run_dir, ortholog_files) create_archive_of_files(target_coding_regions, genome_coding_regions_files) #Concatenate coding region files per genome concatemer_files = concatemer_per_genome(run_dir, genome_coding_regions_files) #Create super concatemer create_super_concatemer(concatemer_files, target_concat_file) #Determine the taxa present in the super concatemer tree by building a phylogenetic tree from genome concatemer and #reading genome ids in the two largest clades. super_distance_file = _run_dna_dist(run_dir, target_concat_file) super_tree_file = _run_neighbor(run_dir, super_distance_file) genome_ids_a, genome_ids_b = _read_taxa_from_tree(super_tree_file) #Map Project IDs to Organism names id_to_name_map = dict( (gid, genome['Organism/Name']) for gid, genome in select_genomes_by_ids(genome_ids_a + genome_ids_b).iteritems()) #Write Project IDs and Organism Names to files, with a fallback to genome_id for external genome with open(target_taxon_a, mode='w') as write_handle: for genome_id in genome_ids_a: write_handle.write('{id}\t{name}\n'.format(id=genome_id, name=id_to_name_map.get( genome_id, genome_id))) with open(target_taxon_b, mode='w') as write_handle: for genome_id in genome_ids_b: write_handle.write('{id}\t{name}\n'.format(id=genome_id, name=id_to_name_map.get( genome_id, genome_id))) #Visualize tree visualize_tree(super_tree_file, id_to_name_map, target_tree) #Remove unused files to free disk space shutil.rmtree(run_dir) #Exit after a comforting log message log.info('Produced: \n%s\n%s\n%s\n%s\n%s', target_coding_regions, target_concat_file, target_taxon_a, target_taxon_b, target_tree)
def main(args): """Main function called when run from command line or as part of pipeline.""" usage = """ Usage: translate.py --genomes=FILE file with selected genome IDs followed by Organism Name on each line --external-zip=FILE optional archive of user provided external genomes containing formatted nucleotide fasta files --dna-zip=FILE destination file path for zip archive of extracted DNA files --protein-zip=FILE destination file path for zip archive of translated protein files """ options = ['genomes', 'external-zip=?', 'dna-zip', 'protein-zip'] genome_ids_file, external_zip, dna_zipfile, protein_zipfile = parse_options( usage, options, args) dna_files = [] protein_files = [] #Read GenBank Project IDs from genomes_file, each on their own line with open(genome_ids_file) as read_handle: genome_ids = [ line.split()[0] for line in read_handle if not line.startswith('#') and 'external genome' not in line ] if len(genome_ids): #Retrieve associated genome dictionaries from complete genomes table genomes = select_genomes_by_ids(genome_ids).values() genomes = sorted(genomes, key=itemgetter('Organism/Name')) #Actually translate the genomes to produced a set of files for both dna files & protein files dna_files, protein_files = translate_genomes(genomes) #Also translate the external genomes if external_zip: #Extract external genomes archive external_dir = tempfile.mkdtemp(prefix='external_genomes_') external_dna_files = extract_archive_of_files(external_zip, external_dir) #Append IDs of external fasta files to genome IDs file _append_external_genomes(external_dna_files, genome_ids_file) #Translate individual files external_protein_files = [ translate_fasta_coding_regions(dna_file) for dna_file in external_dna_files ] #Add the files to the appropriate collections dna_files.extend(external_dna_files) protein_files.extend(external_protein_files) #Write the produced files to command line argument filenames create_archive_of_files(dna_zipfile, dna_files) create_archive_of_files(protein_zipfile, protein_files) #Do not clean up extracted DNA files or Protein translations: Keep them as cache #But do clean up external_dir now that the compressed archives are created if external_zip: shutil.rmtree(external_dir) #Exit after a comforting log message log.info("Produced: \n%s &\n%s", dna_zipfile, protein_zipfile)
def calculate_tables(genome_ids_a, genome_ids_b, sico_files, oddeven=False): """Compute a spreadsheet of data points each for A and B based the SICO files, without duplicating computations.""" #Convert file names into identifiers while preserving filenames, as filenames are used both for BioPython & PhiPack orth_files = [(os.path.split(sico_file)[1].split('.')[0], sico_file) for sico_file in sico_files] #Find PhiPack values for each sico file orth_phipack_values = _phipack_values_for_sicos(orth_files) #Convert list of sico files into ortholog name mapped to BioPython Alignment object sico_alignments = [(ortholog, AlignIO.read(sico_file, 'fasta')) for ortholog, sico_file in orth_files] #Only retrieve genomes once which we'll use to link gene names to orthologs all_genome_ids = list(genome_ids_a) all_genome_ids.extend(genome_ids_b) genomes = select_genomes_by_ids(all_genome_ids).values() #For each ortholog, determine the newest gene name across taxa so unannotated taxa also get gene names ortholog_gene_names = dict((ortholog, get_most_recent_gene_name(genomes, alignmnt)) for ortholog, alignmnt in sico_alignments) #Split individual sico alignments into separate alignments for each of the clades per ortholog #These split alignments can later be reversed and/or subselections can be made to calculate for alternate alignments split_alignments = [(ortholog, MultipleSeqAlignment(seqr for seqr in alignmnt if seqr.id.split('|')[0] in genome_ids_a), MultipleSeqAlignment(seqr for seqr in alignmnt if seqr.id.split('|')[0] in genome_ids_b)) for ortholog, alignmnt in sico_alignments] #Calculate tables for normal sico alignments log.info('Starting calculations for full alignments') table_a, table_b = _tables_for_split_alignments(split_alignments, ortholog_gene_names, orth_phipack_values) if not oddeven: return table_a, table_b #As an alternate method of calculating number of substitutions for independent X-axis of eventual graph: #split each alignment for a and b into two further alignments of odd and even codons odd_even_split_orth_alignments = [(orthologname, _every_other_codon_alignments(alignment_x), _every_other_codon_alignments(alignment_y)) for orthologname, alignment_x, alignment_y in split_alignments] #Recover odd alignments as first from each pair of alignments odd_split_alignments = [(orthologname, odd_even_x[0], odd_even_y[0]) for orthologname, odd_even_x, odd_even_y in odd_even_split_orth_alignments] #Create files for all the odd codon alignments, so we can run PhiPack for them odd_alignments_dir = tempfile.mkdtemp(prefix='odd_codon_alignments_') odd_files = dict((ortholog, os.path.join(odd_alignments_dir, ortholog + '.ffn')) for ortholog, odd_x, odd_y in odd_split_alignments) for ortholog, odd_x, odd_y in odd_split_alignments: AlignIO.write([odd_x, odd_y], odd_files[ortholog], 'fasta') odd_phipack_vals = _phipack_values_for_sicos(odd_files.items()) shutil.rmtree(odd_alignments_dir) #Calculate tables for odd codon sico alignments log.info('Starting calculations for odd alignments') table_a_odd, table_b_odd = _tables_for_split_alignments(odd_split_alignments, ortholog_gene_names, odd_phipack_vals) #Recover even alignments as second from each pair of alignments even_split_alignments = [(orthologname, odd_even_x[1], odd_even_y[1]) for orthologname, odd_even_x, odd_even_y in odd_even_split_orth_alignments] #Create files for all the odd codon alignments, so we can run PhiPack for them even_alignments_dir = tempfile.mkdtemp(prefix='even_codon_alignments_') even_files = dict((ortholog, os.path.join(even_alignments_dir, ortholog + '.ffn')) for ortholog, even_x, even_y in even_split_alignments) for ortholog, even_x, even_y in even_split_alignments: AlignIO.write([even_x, even_y], even_files[ortholog], 'fasta') even_phipack_vals = _phipack_values_for_sicos(even_files.items()) shutil.rmtree(even_alignments_dir) #Calculate tables for even codon sico alignments log.info('Starting calculations for even alignments') table_a_even, table_b_even = _tables_for_split_alignments(even_split_alignments, ortholog_gene_names, even_phipack_vals) #Concatenate tables and return their values table_a_full = tempfile.mkstemp(suffix='.tsv', prefix='table_a_full_')[1] table_b_full = tempfile.mkstemp(suffix='.tsv', prefix='table_b_full_')[1] concatenate(table_a_full, [table_a, table_a_odd, table_a_even]) concatenate(table_b_full, [table_b, table_b_odd, table_b_even]) return table_a_full, table_b_full