Ejemplo n.º 1
0
def test_sequentual_phy__different_length_names_1():
    msa = MSA([FASTA("A_short_name", None, "ACGTTGATAACCAGG"),
               FASTA("Another_really_long_sequence_name_that_is_too_long", None, "TGCAGAGTACGACGT")])
    expected = \
"""2 15

A_short_name                        ACGTTGATAA  CCAGG
Another_really_long_sequence_n      TGCAGAGTAC  GACGT"""
    print interleaved_phy(msa), expected
    assert_equal(interleaved_phy(msa), expected)
Ejemplo n.º 2
0
def test_sequentual_phy__different_length_names_2():
    msa = MSA([FASTA("Burchelli_4", None, "ACGTTGATAACCAGG"),
               FASTA("Donkey",      None, "TGCAGAGTACGACGT")])
    expected = \
"""2 15

Burchelli_4             ACGTTGATAA  CCAGG
Donkey                  TGCAGAGTAC  GACGT"""
    print interleaved_phy(msa), expected
    assert_equal(interleaved_phy(msa), expected)
Ejemplo n.º 3
0
def test_sequentual_phy__different_length_names_2():
    msa = MSA([
        FASTA("Burchelli_4", None, "ACGTTGATAACCAGG"),
        FASTA("Donkey", None, "TGCAGAGTACGACGT")
    ])
    expected = \
"""2 15

Burchelli_4             ACGTTGATAA  CCAGG
Donkey                  TGCAGAGTAC  GACGT"""
    print interleaved_phy(msa), expected
    assert_equal(interleaved_phy(msa), expected)
Ejemplo n.º 4
0
def test_sequentual_phy__different_length_names_1():
    msa = MSA([
        FASTA("A_short_name", None, "ACGTTGATAACCAGG"),
        FASTA("Another_really_long_sequence_name_that_is_too_long", None,
              "TGCAGAGTACGACGT")
    ])
    expected = \
"""2 15

A_short_name                        ACGTTGATAA  CCAGG
Another_really_long_sequence_n      TGCAGAGTAC  GACGT"""
    print interleaved_phy(msa), expected
    assert_equal(interleaved_phy(msa), expected)
Ejemplo n.º 5
0
def test_interleaved_phy__short_sequences():
    expected = \
"""2 44

seq1        ACGTTGATAA  CCAGGAGGGA  TTCGCGATTG  GTGGTAACGT  AGCC
seq2        TGCAGAGTAC  GACGTCTCCT  AGATCCTGGA  CAATTTAAAC  CGAA"""
    assert_equal(interleaved_phy(_MSA_MEDIUM_SEQUENCES), expected)
Ejemplo n.º 6
0
def test_interleaved_phy__with_flag():
    expected = \
"""2 15 I

seq1        ACGTTGATAA  CCAGG
seq2        TGCAGAGTAC  GACGT"""
    assert_equal(interleaved_phy(_MSA_SHORT_SEQUENCES, add_flag = True), expected)
Ejemplo n.º 7
0
def test_interleaved_phy__medium_names():
    expected = \
"""2 15

A_really_long_sequence  ACGTTGATAA  CCAGG
Another_real_long_one!  TGCAGAGTAC  GACGT"""
    assert_equal(interleaved_phy(_MSA_MEDIUM_NAMES), expected)
Ejemplo n.º 8
0
def test_interleaved_phy__long_names():
    expected = \
"""2 15

A_really_long_sequence_name_th      ACGTTGATAA  CCAGG
Another_really_long_sequence_n      TGCAGAGTAC  GACGT"""
    assert_equal(interleaved_phy(_MSA_LONG_NAMES), expected)
Ejemplo n.º 9
0
def test_interleaved_phy__medium_names():
    expected = \
"""2 15

A_really_long_sequence  ACGTTGATAA  CCAGG
Another_real_long_one!  TGCAGAGTAC  GACGT"""
    assert_equal(interleaved_phy(_MSA_MEDIUM_NAMES), expected)
Ejemplo n.º 10
0
def test_interleaved_phy__short_sequences():
    expected = \
"""2 44

seq1        ACGTTGATAA  CCAGGAGGGA  TTCGCGATTG  GTGGTAACGT  AGCC
seq2        TGCAGAGTAC  GACGTCTCCT  AGATCCTGGA  CAATTTAAAC  CGAA"""
    assert_equal(interleaved_phy(_MSA_MEDIUM_SEQUENCES), expected)
Ejemplo n.º 11
0
def test_interleaved_phy__long_names():
    expected = \
"""2 15

A_really_long_sequence_name_th      ACGTTGATAA  CCAGG
Another_really_long_sequence_n      TGCAGAGTAC  GACGT"""
    assert_equal(interleaved_phy(_MSA_LONG_NAMES), expected)
Ejemplo n.º 12
0
def test_interleaved_phy__with_flag():
    expected = \
"""2 15 I

seq1        ACGTTGATAA  CCAGG
seq2        TGCAGAGTAC  GACGT"""
    assert_equal(interleaved_phy(_MSA_SHORT_SEQUENCES, add_flag=True),
                 expected)
Ejemplo n.º 13
0
def test_interleaved_phy__multi_line_sequences():
    expected = \
"""2 140

seq1        CGGATCTGCT  CCTCCACTGG  CCACGTTTAC  TGTCCCCCAA  CCGTTCGTCC
seq2        AGTTGAAGAG  GCGGAACGTT  TGTAAACCGC  GCTAACGTAG  TTCTACAACC

CGACCTAGTT  ATACTTCTTA  GCAAGGTGTA  AAACCAGAGA  TTGAGGTTAT  AACGTTCCTA
AGCCACCCGG  TTCGAAGGAA  CAACTGGTCG  CCATAATTAG  GCGAAACGAT  AGTGCACTAA

ATCAGTTATT  AAATTACCGC  GCCCCGACAG
GGTCAGGTGC  GCCCCTGTAA  ATAATTAGAT"""
    assert_equal(interleaved_phy(_MSA_LONG_SEQUENCES), expected)
Ejemplo n.º 14
0
def test_interleaved_phy__multi_line_sequences():
    expected = \
"""2 140

seq1        CGGATCTGCT  CCTCCACTGG  CCACGTTTAC  TGTCCCCCAA  CCGTTCGTCC
seq2        AGTTGAAGAG  GCGGAACGTT  TGTAAACCGC  GCTAACGTAG  TTCTACAACC

CGACCTAGTT  ATACTTCTTA  GCAAGGTGTA  AAACCAGAGA  TTGAGGTTAT  AACGTTCCTA
AGCCACCCGG  TTCGAAGGAA  CAACTGGTCG  CCATAATTAG  GCGAAACGAT  AGTGCACTAA

ATCAGTTATT  AAATTACCGC  GCCCCGACAG
GGTCAGGTGC  GCCCCTGTAA  ATAATTAGAT"""
    assert_equal(interleaved_phy(_MSA_LONG_SEQUENCES), expected)
Ejemplo n.º 15
0
    def _run(self, _config, temp):
        msas = []
        for filename in sorted(self._infiles):
            split_by = self._infiles[filename].get("partition_by", self._part_by)
            for (key, msa) in sorted(split_msa(read_msa(filename), split_by).items()):
                for excluded_group in self._excluded:
                    msa.pop(excluded_group)
                msas.append(("%s_%s" % (self._infiles[filename]["name"], key), msa))

        msa = join_msa(*(msa for (_, msa) in msas))
        with open(reroot_path(temp, self._out_prefix + ".phy"), "w") as output:
            output.write(interleaved_phy(msa, add_flag = self._add_flag))

        with open(reroot_path(temp, self._out_prefix + ".partitions"), "w") as output:
            end = 0
            for (name, msa) in msas:
                length = len(msa.itervalues().next())
                output.write("DNA, %s = %i-%i\n" % (name, end + 1, end + length))
                end += length
Ejemplo n.º 16
0
    def _run(self, _config, temp):
        merged_msas = []
        for (name, files_dd) in sorted(self._infiles.iteritems()):
            partitions = files_dd["partitions"]
            msas = dict((key, []) for key in partitions)
            for filename in files_dd["filenames"]:
                msa = MSA.from_file(filename)
                if self._excluded:
                    msa = msa.exclude(self._excluded)

                for (key, msa_part) in msa.split(partitions).iteritems():
                    msas[key].append(msa_part)

            msas.pop("X", None)
            for (key, msa_parts) in sorted(msas.iteritems()):
                merged_msa = MSA.join(*msa_parts)
                if self._reduce:
                    merged_msa = merged_msa.reduce()

                if merged_msa is not None:
                    merged_msas.append(("%s_%s" % (name, key),
                                        merged_msa))

        out_fname_phy = reroot_path(temp, self._out_prefix + ".phy")
        with open(out_fname_phy, "w") as output_phy:
            final_msa = MSA.join(*(msa for (_, msa) in merged_msas))
            output_phy.write(interleaved_phy(final_msa))

        partition_end = 0
        out_fname_parts = reroot_path(temp, self._out_prefix + ".partitions")
        with open(out_fname_parts, "w") as output_part:
            for (name, msa) in merged_msas:
                length = msa.seqlen()
                output_part.write("DNA, %s = %i-%i\n"
                                  % (name,
                                     partition_end + 1,
                                     partition_end + length))
                partition_end += length
Ejemplo n.º 17
0
    def _run(self, _config, temp):
        merged_msas = []
        for (name, files_dd) in sorted(self._infiles.iteritems()):
            partitions = files_dd["partitions"]
            msas = dict((key, []) for key in partitions)
            for filename in files_dd["filenames"]:
                msa = MSA.from_file(filename)
                if self._excluded:
                    msa = msa.exclude(self._excluded)

                for (key, msa_part) in msa.split(partitions).iteritems():
                    msas[key].append(msa_part)

            msas.pop("X", None)
            for (key, msa_parts) in sorted(msas.iteritems()):
                merged_msa = MSA.join(*msa_parts)
                if self._reduce:
                    merged_msa = merged_msa.reduce()

                if merged_msa is not None:
                    merged_msas.append(("%s_%s" % (name, key), merged_msa))

        out_fname_phy = reroot_path(temp, self._out_prefix + ".phy")
        with open(out_fname_phy, "w") as output_phy:
            final_msa = MSA.join(*(msa for (_, msa) in merged_msas))
            output_phy.write(interleaved_phy(final_msa))

        partition_end = 0
        out_fname_parts = reroot_path(temp, self._out_prefix + ".partitions")
        with open(out_fname_parts, "w") as output_part:
            for (name, msa) in merged_msas:
                length = msa.seqlen()
                output_part.write(
                    "DNA, %s = %i-%i\n" %
                    (name, partition_end + 1, partition_end + length))
                partition_end += length
Ejemplo n.º 18
0
def test_interleaved_phy__different_lengths():
    _mock = flexmock(MSA).should_receive('validate').at_least.once
    interleaved_phy(_MSA_MEDIUM_NAMES)
Ejemplo n.º 19
0
    def _run(self, _config, temp):
        msa = MSA.join(*(MSA.from_file(filename)
                         for filename in sorted(self.input_files)))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write(interleaved_phy(msa, add_flag=self._add_flag))
Ejemplo n.º 20
0
    def _run(self, _config, temp):
        msa = MSA.join(*(MSA.from_file(filename) for filename in sorted(self.input_files)))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write(interleaved_phy(msa, add_flag = self._add_flag))
Ejemplo n.º 21
0
def test_interleaved_phy__different_lengths():
    _mock = flexmock(MSA).should_receive('validate').at_least.once
    interleaved_phy(_MSA_MEDIUM_NAMES)