Esempio n. 1
0
def test_write_msa(temp_folder):
    msa = {"seq1" : "ACGTA", "seq2" : "CGTAC"}
    fname = os.path.join(temp_folder, "out.afa")
    with RequiredCall(_VALIDATION_PATH, args = [msa]):
        write_msa(msa, fname)
    with RequiredCall(_VALIDATION_PATH, args = [msa]):
        assert_equal(read_msa(fname), msa)
Esempio n. 2
0
def test_write_msa(temp_folder):
    msa = {"seq1": "ACGTA", "seq2": "CGTAC"}
    fname = os.path.join(temp_folder, "out.afa")
    with RequiredCall(_VALIDATION_PATH, args=[msa]):
        write_msa(msa, fname)
    with RequiredCall(_VALIDATION_PATH, args=[msa]):
        assert_equal(read_msa(fname), msa)
Esempio n. 3
0
def test_read_msa__compressed_bz2():
    expected = {
        "This_is_BZ_FASTA!": "CGTNA",
        "This_is_ALSO_BZ_FASTA!": "ACGTN"
    }
    with RequiredCall(_VALIDATION_PATH, args=[expected]):
        results = read_msa("tests/data/fasta_file.fasta.bz2")
        assert_equal(results, expected)
Esempio n. 4
0
    def _run(self, _config, temp):
        # Read and check that MSAs share groups
        msas = [read_msa(filename) for filename in sorted(self.input_files)]
        join_msa(*msas)

        blocks = []
        for msa in msas:
            blocks.append(sequential_phy(msa, add_flag = self._add_flag))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write("\n\n".join(blocks))
Esempio n. 5
0
def _read_sequences(filenames):
    expected_groups = None
    for filename in sorted(filenames):
        msa  = read_msa(filename)

        if not expected_groups:
            expected_groups = set(msa)
        elif set(msa) != expected_groups:
            difference = expected_groups.symmetric_difference(msa)
            raise NodeError("Unexpected/missing groups for sequence (%s): %s" \
                                % (filename, ", ".join(difference)))

        yield (filename, msa)
Esempio n. 6
0
    def _run(self, _config, temp):
        msa = read_msa(self._input_file)
        for excluded_group in self._excluded:
            msa.pop(excluded_group)

        lines = []
        lines.append("  %i %i" % (len(msa), len(msa.itervalues().next())))
        for (name, seq) in sorted(msa.iteritems()):
            lines.append("")
            lines.append(name)

            for line in fragment(60, seq.upper()):
                lines.append(" ".join(fragment(3, line)))

        with open(fileutils.reroot_path(temp, self._output_file), "w") as output:
            output.write("\n".join(lines))
Esempio n. 7
0
    def _run(self, _config, temp):
        alignment = msa.read_msa(self._input_file)

        for (to_filter, groups) in self._filter_by.iteritems():
            sequences = [alignment[group] for group in groups]
            sequence = list(alignment[to_filter])
            for (index, nts) in enumerate(zip(*sequences)):
                nt = sequence[index]
                if (nt not in "Nn-") and (nts.count(nt) == 1):
                    sequence[index] = 'n'

            alignment[to_filter] = "".join(sequence)

        temp_filename = fileutils.reroot_path(temp, self._output_file)
        msa.write_msa(alignment, temp_filename)
        fileutils.move_file(temp_filename, self._output_file)
Esempio n. 8
0
    def _run(self, _config, temp):
        alignment = msa.read_msa(self._input_file)

        for (to_filter, groups) in self._filter_by.iteritems():
            sequences = [alignment[group] for group in groups]
            sequence = list(alignment[to_filter])
            for (index, nts) in enumerate(zip(*sequences)):
                nt = sequence[index]
                if (nt not in "Nn-") and (nts.count(nt) == 1):
                    sequence[index] = 'n'

            alignment[to_filter] = "".join(sequence)

        temp_filename = fileutils.reroot_path(temp, self._output_file)
        msa.write_msa(alignment, temp_filename)
        fileutils.move_file(temp_filename, self._output_file)
Esempio n. 9
0
    def _run(self, _config, temp):
        msa = read_msa(self._input_file)
        for excluded_group in self._excluded:
            msa.pop(excluded_group)

        lines = []
        lines.append("  %i %i" % (len(msa), len(msa.itervalues().next())))
        for (name, seq) in sorted(msa.iteritems()):
            lines.append("")
            lines.append(name)

            for line in fragment(60, seq.upper()):
                lines.append(" ".join(fragment(3, line)))

        with open(fileutils.reroot_path(temp, self._output_file),
                  "w") as output:
            output.write("\n".join(lines))
Esempio n. 10
0
    def _run(self, _config, temp):
        msas = []
        for filename in sorted(self._infiles):
            split_by = self._infiles[filename].get("partition_by", self._part_by)
            for (key, msa) in sorted(split_msa(read_msa(filename), split_by).items()):
                for excluded_group in self._excluded:
                    msa.pop(excluded_group)
                msas.append(("%s_%s" % (self._infiles[filename]["name"], key), msa))

        msa = join_msa(*(msa for (_, msa) in msas))
        with open(reroot_path(temp, self._out_prefix + ".phy"), "w") as output:
            output.write(interleaved_phy(msa, add_flag = self._add_flag))

        with open(reroot_path(temp, self._out_prefix + ".partitions"), "w") as output:
            end = 0
            for (name, msa) in msas:
                length = len(msa.itervalues().next())
                output.write("DNA, %s = %i-%i\n" % (name, end + 1, end + length))
                end += length
Esempio n. 11
0
def test_read_msa__compressed_bz2():
    expected = {"This_is_BZ_FASTA!" : "CGTNA",
                "This_is_ALSO_BZ_FASTA!" : "ACGTN"}
    with RequiredCall(_VALIDATION_PATH, args = [expected]):
        results  = read_msa("tests/data/fasta_file.fasta.bz2")
        assert_equal(results, expected)
Esempio n. 12
0
    def _run(self, _config, temp):
        msa = join_msa(*(read_msa(filename) for filename in sorted(self.input_files)))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write(interleaved_phy(msa, add_flag = self._add_flag))