Exemple #1
0
 def test_codeml_outfile(self):
     out = os.path.join(PATH, 'tests', 'data', 'asr',
                        'codeml.ancestors.tsv')
     outfile = asr(CODEML, self.msa, self.tree, 'JTT', outfile=out)
     self.assertTrue(os.path.isfile(outfile))
     self.assertEqual(out, outfile)
     self.rm = out
Exemple #2
0
 def test_raxml_default(self):
     out = asr(RAXML, self.msa, self.tree, 'JTT')
     self.assertTrue(os.path.isfile(out))
     self.rm = out
Exemple #3
0
 def test_codeml_dfault(self):
     out = asr(CODEML, self.msa, self.tree, 'JTT')
     self.assertTrue(os.path.isfile(out))
     self.rm = out
Exemple #4
0
def _sequencing(sequence, tree, aligner, ancestor, wd, asr_model, verbose):
    """
    Identify the type of the sequence file.
    
    :param sequence: str, path to a sequence data file.
    :param tree: str, path to a NEWICK tree file.
    :return: tuple, sequence, alignment, ancestor, and simulation data file.
    """

    if tree:
        utilities.Tree(tree, leave=True)
        AA, lengths, aa = set(AMINO_ACIDS), [], []

        with open(sequence) as handle:
            line = handle.readline().strip()
            if line.startswith('>'):
                handle.seek(0)
                records = SeqIO.parse(handle, 'fasta')
                for record in records:
                    lengths.append(len(record.seq))
                    aa.append(set(record.seq).issubset(AA))
            else:
                error('NEWICK format tree was provided, but the sequence file '
                      'was not in the FASTA format.')
                sys.exit(1)

        if len(set(lengths)) == 1:
            alignment = sequence
            if all(aa):
                trimmed = alignment
            else:
                trimmed = ''.join(
                    [utilities.basename(alignment), '.trimmed.fasta'])
                if os.path.isfile(trimmed):
                    info('Using pre-existed trimmed alignment file.')
                else:
                    _, trimmed = utilities.trim(alignment, outfile=trimmed)
        else:
            if aligner:
                aler, _ = msa._guess(aligner)
                outfile = ''.join(
                    [utilities.basename(sequence), '.{}.fasta'.format(aler)])
                if os.path.isfile(outfile):
                    info('Using pre-existed alignment file')
                    alignment = outfile
                    trimmed = ''.join(
                        [utilities.basename(alignment), '.trimmed.fasta'])
                    if os.path.isfile(trimmed):
                        info('Using pre-existed trimmed alignment file.')
                    else:
                        _, trimmed = utilities.trim(alignment, outfile=trimmed)
                else:
                    trimmed = msa.msa(aligner,
                                      sequence,
                                      verbose=verbose,
                                      outfile=outfile,
                                      trimming=True)
            else:
                error('FASTA format sequence file was provided, but no '
                      'alignment program was provided.')
                sys.exit(1)

        if trimmed:
            if ancestor:
                if trimmed.endswith('.trimmed.fasta'):
                    name = trimmed.replace('.trimmed.fasta', '')
                else:
                    name = trimmed

                aser, _ = asr._guess(ancestor)
                outfile = '{}.{}.tsv'.format(utilities.basename(name), aser)
                if os.path.isfile(outfile):
                    info('Using pre-existed ancestral states sequence file.')
                    sequence = outfile
                else:
                    sequence = asr.asr(ancestor,
                                       trimmed,
                                       tree,
                                       asr_model,
                                       verbose=verbose,
                                       outfile=outfile)
            else:
                error('No ancestral reconstruction program was provided.')
                sys.exit(1)
        else:
            sys.exit(1)

    tree, rate, records, aps, size = _load(sequence)
    return tree, rate, records, aps, size, sequence