def test_codeml_outfile(self): out = os.path.join(PATH, 'tests', 'data', 'asr', 'codeml.ancestors.tsv') outfile = asr(CODEML, self.msa, self.tree, 'JTT', outfile=out) self.assertTrue(os.path.isfile(outfile)) self.assertEqual(out, outfile) self.rm = out
def test_raxml_default(self): out = asr(RAXML, self.msa, self.tree, 'JTT') self.assertTrue(os.path.isfile(out)) self.rm = out
def test_codeml_dfault(self): out = asr(CODEML, self.msa, self.tree, 'JTT') self.assertTrue(os.path.isfile(out)) self.rm = out
def _sequencing(sequence, tree, aligner, ancestor, wd, asr_model, verbose): """ Identify the type of the sequence file. :param sequence: str, path to a sequence data file. :param tree: str, path to a NEWICK tree file. :return: tuple, sequence, alignment, ancestor, and simulation data file. """ if tree: utilities.Tree(tree, leave=True) AA, lengths, aa = set(AMINO_ACIDS), [], [] with open(sequence) as handle: line = handle.readline().strip() if line.startswith('>'): handle.seek(0) records = SeqIO.parse(handle, 'fasta') for record in records: lengths.append(len(record.seq)) aa.append(set(record.seq).issubset(AA)) else: error('NEWICK format tree was provided, but the sequence file ' 'was not in the FASTA format.') sys.exit(1) if len(set(lengths)) == 1: alignment = sequence if all(aa): trimmed = alignment else: trimmed = ''.join( [utilities.basename(alignment), '.trimmed.fasta']) if os.path.isfile(trimmed): info('Using pre-existed trimmed alignment file.') else: _, trimmed = utilities.trim(alignment, outfile=trimmed) else: if aligner: aler, _ = msa._guess(aligner) outfile = ''.join( [utilities.basename(sequence), '.{}.fasta'.format(aler)]) if os.path.isfile(outfile): info('Using pre-existed alignment file') alignment = outfile trimmed = ''.join( [utilities.basename(alignment), '.trimmed.fasta']) if os.path.isfile(trimmed): info('Using pre-existed trimmed alignment file.') else: _, trimmed = utilities.trim(alignment, outfile=trimmed) else: trimmed = msa.msa(aligner, sequence, verbose=verbose, outfile=outfile, trimming=True) else: error('FASTA format sequence file was provided, but no ' 'alignment program was provided.') sys.exit(1) if trimmed: if ancestor: if trimmed.endswith('.trimmed.fasta'): name = trimmed.replace('.trimmed.fasta', '') else: name = trimmed aser, _ = asr._guess(ancestor) outfile = '{}.{}.tsv'.format(utilities.basename(name), aser) if os.path.isfile(outfile): info('Using pre-existed ancestral states sequence file.') sequence = outfile else: sequence = asr.asr(ancestor, trimmed, tree, asr_model, verbose=verbose, outfile=outfile) else: error('No ancestral reconstruction program was provided.') sys.exit(1) else: sys.exit(1) tree, rate, records, aps, size = _load(sequence) return tree, rate, records, aps, size, sequence