def test_write_msa(temp_folder): msa = {"seq1" : "ACGTA", "seq2" : "CGTAC"} fname = os.path.join(temp_folder, "out.afa") with RequiredCall(_VALIDATION_PATH, args = [msa]): write_msa(msa, fname) with RequiredCall(_VALIDATION_PATH, args = [msa]): assert_equal(read_msa(fname), msa)
def test_write_msa(temp_folder): msa = {"seq1": "ACGTA", "seq2": "CGTAC"} fname = os.path.join(temp_folder, "out.afa") with RequiredCall(_VALIDATION_PATH, args=[msa]): write_msa(msa, fname) with RequiredCall(_VALIDATION_PATH, args=[msa]): assert_equal(read_msa(fname), msa)
def test_read_msa__compressed_bz2(): expected = { "This_is_BZ_FASTA!": "CGTNA", "This_is_ALSO_BZ_FASTA!": "ACGTN" } with RequiredCall(_VALIDATION_PATH, args=[expected]): results = read_msa("tests/data/fasta_file.fasta.bz2") assert_equal(results, expected)
def _run(self, _config, temp): # Read and check that MSAs share groups msas = [read_msa(filename) for filename in sorted(self.input_files)] join_msa(*msas) blocks = [] for msa in msas: blocks.append(sequential_phy(msa, add_flag = self._add_flag)) with open(reroot_path(temp, self._out_phy), "w") as output: output.write("\n\n".join(blocks))
def _read_sequences(filenames): expected_groups = None for filename in sorted(filenames): msa = read_msa(filename) if not expected_groups: expected_groups = set(msa) elif set(msa) != expected_groups: difference = expected_groups.symmetric_difference(msa) raise NodeError("Unexpected/missing groups for sequence (%s): %s" \ % (filename, ", ".join(difference))) yield (filename, msa)
def _run(self, _config, temp): msa = read_msa(self._input_file) for excluded_group in self._excluded: msa.pop(excluded_group) lines = [] lines.append(" %i %i" % (len(msa), len(msa.itervalues().next()))) for (name, seq) in sorted(msa.iteritems()): lines.append("") lines.append(name) for line in fragment(60, seq.upper()): lines.append(" ".join(fragment(3, line))) with open(fileutils.reroot_path(temp, self._output_file), "w") as output: output.write("\n".join(lines))
def _run(self, _config, temp): alignment = msa.read_msa(self._input_file) for (to_filter, groups) in self._filter_by.iteritems(): sequences = [alignment[group] for group in groups] sequence = list(alignment[to_filter]) for (index, nts) in enumerate(zip(*sequences)): nt = sequence[index] if (nt not in "Nn-") and (nts.count(nt) == 1): sequence[index] = 'n' alignment[to_filter] = "".join(sequence) temp_filename = fileutils.reroot_path(temp, self._output_file) msa.write_msa(alignment, temp_filename) fileutils.move_file(temp_filename, self._output_file)
def _run(self, _config, temp): alignment = msa.read_msa(self._input_file) for (to_filter, groups) in self._filter_by.iteritems(): sequences = [alignment[group] for group in groups] sequence = list(alignment[to_filter]) for (index, nts) in enumerate(zip(*sequences)): nt = sequence[index] if (nt not in "Nn-") and (nts.count(nt) == 1): sequence[index] = 'n' alignment[to_filter] = "".join(sequence) temp_filename = fileutils.reroot_path(temp, self._output_file) msa.write_msa(alignment, temp_filename) fileutils.move_file(temp_filename, self._output_file)
def _run(self, _config, temp): msa = read_msa(self._input_file) for excluded_group in self._excluded: msa.pop(excluded_group) lines = [] lines.append(" %i %i" % (len(msa), len(msa.itervalues().next()))) for (name, seq) in sorted(msa.iteritems()): lines.append("") lines.append(name) for line in fragment(60, seq.upper()): lines.append(" ".join(fragment(3, line))) with open(fileutils.reroot_path(temp, self._output_file), "w") as output: output.write("\n".join(lines))
def _run(self, _config, temp): msas = [] for filename in sorted(self._infiles): split_by = self._infiles[filename].get("partition_by", self._part_by) for (key, msa) in sorted(split_msa(read_msa(filename), split_by).items()): for excluded_group in self._excluded: msa.pop(excluded_group) msas.append(("%s_%s" % (self._infiles[filename]["name"], key), msa)) msa = join_msa(*(msa for (_, msa) in msas)) with open(reroot_path(temp, self._out_prefix + ".phy"), "w") as output: output.write(interleaved_phy(msa, add_flag = self._add_flag)) with open(reroot_path(temp, self._out_prefix + ".partitions"), "w") as output: end = 0 for (name, msa) in msas: length = len(msa.itervalues().next()) output.write("DNA, %s = %i-%i\n" % (name, end + 1, end + length)) end += length
def test_read_msa__compressed_bz2(): expected = {"This_is_BZ_FASTA!" : "CGTNA", "This_is_ALSO_BZ_FASTA!" : "ACGTN"} with RequiredCall(_VALIDATION_PATH, args = [expected]): results = read_msa("tests/data/fasta_file.fasta.bz2") assert_equal(results, expected)
def _run(self, _config, temp): msa = join_msa(*(read_msa(filename) for filename in sorted(self.input_files))) with open(reroot_path(temp, self._out_phy), "w") as output: output.write(interleaved_phy(msa, add_flag = self._add_flag))