def test_trimal(key, hash3, hash07, alb_resources, hf): alignbuddy = alb_resources.get_one(key) tester1, tester2 = Alb.make_copy(alignbuddy), Alb.make_copy(alignbuddy) Alb.trimal(tester1, 3) assert hf.buddy2hash(tester1) == hash3, alignbuddy.write("error_files%s%s" % (hash3, os.path.sep)) tester1, tester2 = Alb.make_copy(alignbuddy), Alb.make_copy(alignbuddy) Alb.trimal(tester1, 0.7) assert hf.buddy2hash(tester1) == hash07, alignbuddy.write("error_files%s%s" % (hash07, os.path.sep))
def test_fasttree_inputs(alb_resources, hf): temp_dir = br.TempDir() # Nucleotide alignbuddy = alb_resources.get_one("o d n") tester = Pb.generate_tree(Alb.make_copy(alignbuddy), 'FastTree', '-seed 12345') assert hf.buddy2hash(tester) in [ 'd7f505182dd1a1744b45cc326096f70c', 'da8a67cae6f3f70668f7cf04060b7cd8', '732c5e9a978cebb1cfce6af6d64950c2' ] tester = Pb.generate_tree(alignbuddy, 'fasttree', '-seed 12345', quiet=True) assert hf.buddy2hash(tester) in [ 'd7f505182dd1a1744b45cc326096f70c', 'da8a67cae6f3f70668f7cf04060b7cd8', '732c5e9a978cebb1cfce6af6d64950c2' ] alignbuddy = alb_resources.get_one("o p n") tester = Pb.generate_tree(alignbuddy, 'fasttree', '-seed 12345', keep_temp="%s%snew_dir" % (temp_dir.path, os.sep)) assert hf.buddy2hash(tester) in [ '57eace9bdd2074297cbd2692c1f4cd38', '82d5a9d4f44fbedf29565686a7cdfcaa', '682210ef16beedee0e9f43c05edac112' ]
def get(self, code="", mode="objs"): """ Returns copies of AlignBuddy objects of the path to their resource files :param code: Letter codes (explained in Class definition) :type code: str :param mode: Return either AlignBuddy "objs" (default) or "paths" :type mode: str :return: AlignBuddy objects or resource paths as controlled by mode {key: resource} :rtype: dict """ files = self.parse_code(code) output = OrderedDict() slc = self.single_letter_codes for molecule in files["molecule"]: for num_aligns in files["num_aligns"]: for _format in files["format"]: try: if mode == "paths": new_obj = self.resources[slc[molecule]][slc[num_aligns]][slc[_format]] elif mode == "objs": new_obj = self.alb_objs[slc[molecule]][slc[num_aligns]][slc[_format]] new_obj = Alb.make_copy(new_obj) else: raise ValueError("The 'mode' parameter only accepts 'objs' or 'paths' as input.") output["%s %s %s" % (num_aligns, molecule, _format)] = new_obj except KeyError: pass return output
def score_sequences(_pair, args): # Calculate the best possible scores, and divide by the observed scores id1, id2 = _pair alb_obj, psi_pred_files, outfile = args id_regex = "^%s$|^%s$" % (id1, id2) alb_copy = Alb.make_copy(alb_obj) Alb.pull_records(alb_copy, id_regex) observed_score = 0 seq1_best = 0 seq2_best = 0 seq1, seq2 = alb_copy.records() prev_aa1 = "-" prev_aa2 = "-" for aa_pos in range(alb_copy.lengths()[0]): aa1 = seq1.seq[aa_pos] aa2 = seq2.seq[aa_pos] if aa1 != "-": seq1_best += BLOSUM62[aa1, aa1] if aa2 != "-": seq2_best += BLOSUM62[aa2, aa2] if aa1 == "-" or aa2 == "-": if prev_aa1 == "-" or prev_aa2 == "-": observed_score += gap_extend else: observed_score += gap_open else: observed_score += BLOSUM62[aa1, aa2] prev_aa1 = str(aa1) prev_aa2 = str(aa2) subs_mat_score = ((observed_score / seq1_best) + (observed_score / seq1_best)) / 2 # PSI PRED comparison num_gaps = 0 ss_score = 0 for row1 in psi_pred_files[id1].itertuples(): if (psi_pred_files[id2]["indx"] == row1.indx).any(): row2 = psi_pred_files[id2].loc[psi_pred_files[id2]["indx"] == row1.indx] row_score = 0 row_score += 1 - abs(float(row1.coil_prob) - float(row2.coil_prob)) row_score += 1 - abs(float(row1.helix_prob) - float(row2.helix_prob)) row_score += 1 - abs(float(row1.sheet_prob) - float(row2.sheet_prob)) ss_score += row_score / 3 else: num_gaps += 1 align_len = len(psi_pred_files[id2]) + num_gaps ss_score /= align_len final_score = (ss_score * 0.3) + (subs_mat_score * 0.7) with lock: with open(outfile, "a") as _ofile: _ofile.write("\n%s,%s,%s" % (id1, id2, final_score)) return
def test_raxml_inputs(): # Nucleotide tester = Alb.AlignBuddy(resource("Mnemiopsis_cds.nex")) assert phylo_to_hash(Pb.generate_tree(tester, 'raxml')) == '706ba436f8657ef3aee7875217dd07c0' # Peptide tester = Alb.AlignBuddy(resource("Mnemiopsis_pep.nex")) assert phylo_to_hash(Pb.generate_tree(Alb.make_copy(tester), 'raxml')) == 'fc35569091eeba49ac4dcec7fc6890bf' # Quiet assert phylo_to_hash(Pb.generate_tree(tester, 'raxml', quiet=True)) == 'fc35569091eeba49ac4dcec7fc6890bf'
def test_concat_alignments_ui(capsys, alb_resources, hf): test_in_args = deepcopy(in_args) test_in_args.concat_alignments = [[]] tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) tester.set_format("genbank") Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "86349e715f41e0bdd91bbd1dc0914769" test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "cd2b6594b22c431aea67fa45899f933a" test_in_args.concat_alignments = [["...", "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9" test_in_args.concat_alignments = [[3, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9" test_in_args.concat_alignments = [[-9, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "aaa9d9b717a5f79cfdf5d2666fb0f687" test_in_args.concat_alignments = [[3, 3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "2f37a4e395162032bf43fab291c882f4" test_in_args.concat_alignments = [[3, -3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "7fa8cd803df82414a5e1e190916456d8" Alb.command_line_ui(test_in_args, alb_resources.get_one("p o g"), skip_exit=True) out, err = capsys.readouterr() assert "Please provide at least two alignments." in err test_in_args.concat_alignments = [["foo"]] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p c"), skip_exit=True) out, err = capsys.readouterr() assert "No match found for record" in err
def test_fasttree_inputs(): temp_dir = MyFuncs.TempDir() # Nucleotide alignbuddy = Alb.AlignBuddy(resource("Mnemiopsis_cds.nex")) tester = Pb.generate_tree(Alb.make_copy(alignbuddy), 'fasttree', '-seed 12345') assert phylo_to_hash(tester) == 'd7f505182dd1a1744b45cc326096f70c' tester = Pb.generate_tree(alignbuddy, 'fasttree', '-seed 12345', quiet=True) assert phylo_to_hash(tester) == 'd7f505182dd1a1744b45cc326096f70c' alignbuddy = Alb.AlignBuddy(resource("Mnemiopsis_pep.nex")) tester = Pb.generate_tree(alignbuddy, 'fasttree', '-seed 12345', keep_temp="%s/new_dir" % temp_dir.path) assert phylo_to_hash(tester) == '57eace9bdd2074297cbd2692c1f4cd38'
def test_concat_alignments_ui(capsys, alb_resources, hf): test_in_args = deepcopy(in_args) test_in_args.concat_alignments = [[]] tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) tester.set_format("genbank") Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "d21940f3dad2295dd647f632825d8541" test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "5ac908ebf7918a45664a31da480fda58" test_in_args.concat_alignments = [["...", "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f" test_in_args.concat_alignments = [[3, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f" test_in_args.concat_alignments = [[-9, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "9d2886afc640d35618754e05223032a2" test_in_args.concat_alignments = [[3, 3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "4e4101f9b5a6d44d524a9783a8c4004b" test_in_args.concat_alignments = [[3, -3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "5d9d9ac8fae604be74c436e5f0b5b6db" Alb.command_line_ui(test_in_args, alb_resources.get_one("p o g"), skip_exit=True) out, err = capsys.readouterr() assert "Please provide at least two alignments." in err test_in_args.concat_alignments = [["foo"]] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p c"), skip_exit=True) out, err = capsys.readouterr() assert "No match found for record" in err
def test_concat_alignments(alb_resources, hf): with pytest.raises(AttributeError) as e: Alb.concat_alignments(alb_resources.get_one("p o g"), '.*') assert "Please provide at least two alignments." in str(e) tester = alb_resources.get_one("o p g") tester.alignments.append(alb_resources.get_one("o p g").alignments[0]) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'foo') assert "No match found for record" in str(e) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'Panx') assert "Replicate matches" in str(e) tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*") assert hf.buddy2hash(tester2) == 'e754350b0397cf54f531421d1e85774f' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "(P)an(x)(.)") assert hf.buddy2hash(tester2) == '5c6653aec09489cadcbed68fbd2f7465' shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr") tester.alignments[1] = shorten.alignments[1] assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == 'f3ed9139ab6f97042a244d3f791228b6'
def test_concat_alignments(alb_resources, hf): with pytest.raises(AttributeError) as e: Alb.concat_alignments(alb_resources.get_one("p o g"), '.*') assert "Please provide at least two alignments." in str(e) tester = alb_resources.get_one("o p g") tester.alignments.append(alb_resources.get_one("o p g").alignments[0]) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'foo') assert "No match found for record" in str(e) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'Panx') assert "Replicate matches" in str(e) tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*") assert hf.buddy2hash(tester2) == 'e49b26f695c910a93f93d70563fd9dd9' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "(P)an(x)(.)") assert hf.buddy2hash(tester2) == '3abfdf2217050ac2170c0de27352a8c6' shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr") tester.alignments[1] = shorten.alignments[1] assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == '685f24ee1fc88860dd9465035040c91e'