Esempio n. 1
0
 def _teardown(self, config, temp):
     # Validate output from MAFFT
     output_file = reroot_path(temp, self._output_file)
     try:
         MSA.from_file(output_file)
     except MSAError, error:
         raise NodeError("Invalid MSA produced by MAFFT:\n%s" % (error,))
Esempio n. 2
0
def _read_sequences(filenames):
    results = {}
    for filename in filenames:
        results[filename] = MSA.from_file(filename)
    MSA.validate(*results.values())

    return results.iteritems()
Esempio n. 3
0
def _read_sequences(filenames):
    results = {}
    for filename in filenames:
        results[filename] = MSA.from_file(filename)
    MSA.validate(*results.values())

    return results.iteritems()
Esempio n. 4
0
    def _run(self, _config, temp):
        alignment = MSA.from_file(self._input_file)
        for (to_filter, groups) in self._filter_by.iteritems():
            alignment = alignment.filter_singletons(to_filter, groups)

        temp_filename = fileutils.reroot_path(temp, self._output_file)
        with open(temp_filename, "w") as handle:
            alignment.to_file(handle)
        fileutils.move_file(temp_filename, self._output_file)
Esempio n. 5
0
    def _run(self, _config, temp):
        alignment = MSA.from_file(self._input_file)
        for (to_filter, groups) in self._filter_by.iteritems():
            alignment = alignment.filter_singletons(to_filter, groups)

        temp_filename = fileutils.reroot_path(temp, self._output_file)
        with open(temp_filename, "w") as handle:
            alignment.to_file(handle)
        fileutils.move_file(temp_filename, self._output_file)
Esempio n. 6
0
    def _run(self, _config, temp):
        # Read and check that MSAs share groups
        msas = [MSA.from_file(filename) for filename in sorted(self.input_files)]
        MSA.validate(*msas)

        blocks = []
        for msa in msas:
            blocks.append(sequential_phy(msa, add_flag = self._add_flag))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write("\n\n".join(blocks))
def _is_sufficently_covered(filepath, min_coverage):
    msa = MSA.from_file(filepath)
    if msa.seqlen() % 3:
        return False

    total_bases_not_covered = 0
    for fasta_record in msa:
        total_bases_not_covered += fasta_record.sequence.upper().count("N")
        total_bases_not_covered += fasta_record.sequence.count("-")

    total_bases = float(len(msa) * msa.seqlen())
    frac_covered = 1.0 - total_bases_not_covered / total_bases
    return frac_covered >= min_coverage
Esempio n. 8
0
    def _run(self, _config, temp):
        # Read and check that MSAs share groups
        msas = [
            MSA.from_file(filename) for filename in sorted(self.input_files)
        ]
        MSA.validate(*msas)

        blocks = []
        for msa in msas:
            blocks.append(sequential_phy(msa, add_flag=self._add_flag))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write("\n\n".join(blocks))
Esempio n. 9
0
def _is_sufficently_covered(filepath, min_coverage):
    msa = MSA.from_file(filepath)
    if msa.seqlen() % 3:
        return False

    total_bases_not_covered = 0
    for fasta_record in msa:
        total_bases_not_covered += fasta_record.sequence.upper().count("N")
        total_bases_not_covered += fasta_record.sequence.count("-")

    total_bases = float(len(msa) * msa.seqlen())
    frac_covered = 1.0 - total_bases_not_covered / total_bases
    return frac_covered >= min_coverage
Esempio n. 10
0
    def _run(self, _config, temp):
        merged_msas = []
        for (name, files_dd) in sorted(self._infiles.iteritems()):
            partitions = files_dd["partitions"]
            msas = dict((key, []) for key in partitions)
            for filename in files_dd["filenames"]:
                msa = MSA.from_file(filename)
                if self._excluded:
                    msa = msa.exclude(self._excluded)

                for (key, msa_part) in msa.split(partitions).iteritems():
                    msas[key].append(msa_part)

            msas.pop("X", None)
            for (key, msa_parts) in sorted(msas.iteritems()):
                merged_msa = MSA.join(*msa_parts)
                if self._reduce:
                    merged_msa = merged_msa.reduce()

                if merged_msa is not None:
                    merged_msas.append(("%s_%s" % (name, key),
                                        merged_msa))

        out_fname_phy = reroot_path(temp, self._out_prefix + ".phy")
        with open(out_fname_phy, "w") as output_phy:
            final_msa = MSA.join(*(msa for (_, msa) in merged_msas))
            output_phy.write(interleaved_phy(final_msa))

        partition_end = 0
        out_fname_parts = reroot_path(temp, self._out_prefix + ".partitions")
        with open(out_fname_parts, "w") as output_part:
            for (name, msa) in merged_msas:
                length = msa.seqlen()
                output_part.write("DNA, %s = %i-%i\n"
                                  % (name,
                                     partition_end + 1,
                                     partition_end + length))
                partition_end += length
Esempio n. 11
0
    def _run(self, _config, temp):
        merged_msas = []
        for (name, files_dd) in sorted(self._infiles.iteritems()):
            partitions = files_dd["partitions"]
            msas = dict((key, []) for key in partitions)
            for filename in files_dd["filenames"]:
                msa = MSA.from_file(filename)
                if self._excluded:
                    msa = msa.exclude(self._excluded)

                for (key, msa_part) in msa.split(partitions).iteritems():
                    msas[key].append(msa_part)

            msas.pop("X", None)
            for (key, msa_parts) in sorted(msas.iteritems()):
                merged_msa = MSA.join(*msa_parts)
                if self._reduce:
                    merged_msa = merged_msa.reduce()

                if merged_msa is not None:
                    merged_msas.append(("%s_%s" % (name, key), merged_msa))

        out_fname_phy = reroot_path(temp, self._out_prefix + ".phy")
        with open(out_fname_phy, "w") as output_phy:
            final_msa = MSA.join(*(msa for (_, msa) in merged_msas))
            output_phy.write(interleaved_phy(final_msa))

        partition_end = 0
        out_fname_parts = reroot_path(temp, self._out_prefix + ".partitions")
        with open(out_fname_parts, "w") as output_part:
            for (name, msa) in merged_msas:
                length = msa.seqlen()
                output_part.write(
                    "DNA, %s = %i-%i\n" %
                    (name, partition_end + 1, partition_end + length))
                partition_end += length
Esempio n. 12
0
    def _run(self, _config, temp):
        msa = MSA.join(*(MSA.from_file(filename)
                         for filename in sorted(self.input_files)))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write(interleaved_phy(msa, add_flag=self._add_flag))
Esempio n. 13
0
    def _run(self, _config, temp):
        msa = MSA.join(*(MSA.from_file(filename) for filename in sorted(self.input_files)))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write(interleaved_phy(msa, add_flag = self._add_flag))
Esempio n. 14
0
def test_msa_from_file__compressed_bz2():
    expected = MSA([FASTA("This_is_BZ_FASTA!", None, "CGTNA"),
                    FASTA("This_is_ALSO_BZ_FASTA!", None,  "ACGTN")])
    results  = MSA.from_file("tests/data/fasta_file.fasta.bz2")
    assert_equal(results, expected)