Beispiel #1
0
def test_trimal(key, hash3, hash07, alb_resources, hf):
    alignbuddy = alb_resources.get_one(key)
    tester1, tester2 = Alb.make_copy(alignbuddy), Alb.make_copy(alignbuddy)
    Alb.trimal(tester1, 3)
    assert hf.buddy2hash(tester1) == hash3, alignbuddy.write("error_files%s%s" % (hash3, os.path.sep))

    tester1, tester2 = Alb.make_copy(alignbuddy), Alb.make_copy(alignbuddy)
    Alb.trimal(tester1, 0.7)
    assert hf.buddy2hash(tester1) == hash07, alignbuddy.write("error_files%s%s" % (hash07, os.path.sep))
def test_fasttree_inputs(alb_resources, hf):
    temp_dir = br.TempDir()
    # Nucleotide
    alignbuddy = alb_resources.get_one("o d n")

    tester = Pb.generate_tree(Alb.make_copy(alignbuddy), 'FastTree',
                              '-seed 12345')
    assert hf.buddy2hash(tester) in [
        'd7f505182dd1a1744b45cc326096f70c', 'da8a67cae6f3f70668f7cf04060b7cd8',
        '732c5e9a978cebb1cfce6af6d64950c2'
    ]

    tester = Pb.generate_tree(alignbuddy,
                              'fasttree',
                              '-seed 12345',
                              quiet=True)
    assert hf.buddy2hash(tester) in [
        'd7f505182dd1a1744b45cc326096f70c', 'da8a67cae6f3f70668f7cf04060b7cd8',
        '732c5e9a978cebb1cfce6af6d64950c2'
    ]

    alignbuddy = alb_resources.get_one("o p n")
    tester = Pb.generate_tree(alignbuddy,
                              'fasttree',
                              '-seed 12345',
                              keep_temp="%s%snew_dir" %
                              (temp_dir.path, os.sep))
    assert hf.buddy2hash(tester) in [
        '57eace9bdd2074297cbd2692c1f4cd38', '82d5a9d4f44fbedf29565686a7cdfcaa',
        '682210ef16beedee0e9f43c05edac112'
    ]
Beispiel #3
0
 def get(self, code="", mode="objs"):
     """
     Returns copies of AlignBuddy objects of the path to their resource files
     :param code: Letter codes (explained in Class definition)
     :type code: str
     :param mode: Return either AlignBuddy "objs" (default) or "paths"
     :type mode: str
     :return: AlignBuddy objects or resource paths as controlled by mode {key: resource}
     :rtype: dict
     """
     files = self.parse_code(code)
     output = OrderedDict()
     slc = self.single_letter_codes
     for molecule in files["molecule"]:
         for num_aligns in files["num_aligns"]:
             for _format in files["format"]:
                 try:
                     if mode == "paths":
                         new_obj = self.resources[slc[molecule]][slc[num_aligns]][slc[_format]]
                     elif mode == "objs":
                         new_obj = self.alb_objs[slc[molecule]][slc[num_aligns]][slc[_format]]
                         new_obj = Alb.make_copy(new_obj)
                     else:
                         raise ValueError("The 'mode' parameter only accepts 'objs' or 'paths' as input.")
                     output["%s %s %s" % (num_aligns, molecule, _format)] = new_obj
                 except KeyError:
                     pass
     return output
Beispiel #4
0
def score_sequences(_pair, args):
    # Calculate the best possible scores, and divide by the observed scores
    id1, id2 = _pair
    alb_obj, psi_pred_files, outfile = args
    id_regex = "^%s$|^%s$" % (id1, id2)
    alb_copy = Alb.make_copy(alb_obj)
    Alb.pull_records(alb_copy, id_regex)
    observed_score = 0
    seq1_best = 0
    seq2_best = 0
    seq1, seq2 = alb_copy.records()
    prev_aa1 = "-"
    prev_aa2 = "-"

    for aa_pos in range(alb_copy.lengths()[0]):
        aa1 = seq1.seq[aa_pos]
        aa2 = seq2.seq[aa_pos]

        if aa1 != "-":
            seq1_best += BLOSUM62[aa1, aa1]
        if aa2 != "-":
            seq2_best += BLOSUM62[aa2, aa2]

        if aa1 == "-" or aa2 == "-":
            if prev_aa1 == "-" or prev_aa2 == "-":
                observed_score += gap_extend
            else:
                observed_score += gap_open
        else:
            observed_score += BLOSUM62[aa1, aa2]
        prev_aa1 = str(aa1)
        prev_aa2 = str(aa2)

    subs_mat_score = ((observed_score / seq1_best) + (observed_score / seq1_best)) / 2

    # PSI PRED comparison
    num_gaps = 0
    ss_score = 0
    for row1 in psi_pred_files[id1].itertuples():
        if (psi_pred_files[id2]["indx"] == row1.indx).any():
            row2 = psi_pred_files[id2].loc[psi_pred_files[id2]["indx"] == row1.indx]
            row_score = 0
            row_score += 1 - abs(float(row1.coil_prob) - float(row2.coil_prob))
            row_score += 1 - abs(float(row1.helix_prob) - float(row2.helix_prob))
            row_score += 1 - abs(float(row1.sheet_prob) - float(row2.sheet_prob))
            ss_score += row_score / 3
        else:
            num_gaps += 1

    align_len = len(psi_pred_files[id2]) + num_gaps
    ss_score /= align_len
    final_score = (ss_score * 0.3) + (subs_mat_score * 0.7)
    with lock:
        with open(outfile, "a") as _ofile:
            _ofile.write("\n%s,%s,%s" % (id1, id2, final_score))
    return
Beispiel #5
0
def test_raxml_inputs():
    # Nucleotide
    tester = Alb.AlignBuddy(resource("Mnemiopsis_cds.nex"))
    assert phylo_to_hash(Pb.generate_tree(tester, 'raxml')) == '706ba436f8657ef3aee7875217dd07c0'

    # Peptide
    tester = Alb.AlignBuddy(resource("Mnemiopsis_pep.nex"))
    assert phylo_to_hash(Pb.generate_tree(Alb.make_copy(tester), 'raxml')) == 'fc35569091eeba49ac4dcec7fc6890bf'

    # Quiet
    assert phylo_to_hash(Pb.generate_tree(tester, 'raxml', quiet=True)) == 'fc35569091eeba49ac4dcec7fc6890bf'
Beispiel #6
0
def test_concat_alignments_ui(capsys, alb_resources, hf):
    test_in_args = deepcopy(in_args)
    test_in_args.concat_alignments = [[]]

    tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    tester.set_format("genbank")
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "86349e715f41e0bdd91bbd1dc0914769"

    test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "cd2b6594b22c431aea67fa45899f933a"

    test_in_args.concat_alignments = [["...", "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9"

    test_in_args.concat_alignments = [[3, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9"

    test_in_args.concat_alignments = [[-9, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "aaa9d9b717a5f79cfdf5d2666fb0f687"

    test_in_args.concat_alignments = [[3, 3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "2f37a4e395162032bf43fab291c882f4"

    test_in_args.concat_alignments = [[3, -3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "7fa8cd803df82414a5e1e190916456d8"

    Alb.command_line_ui(test_in_args,
                        alb_resources.get_one("p o g"),
                        skip_exit=True)
    out, err = capsys.readouterr()
    assert "Please provide at least two alignments." in err

    test_in_args.concat_alignments = [["foo"]]
    Alb.command_line_ui(test_in_args,
                        alb_resources.get_one("m p c"),
                        skip_exit=True)
    out, err = capsys.readouterr()
    assert "No match found for record" in err
Beispiel #7
0
def test_fasttree_inputs():
    temp_dir = MyFuncs.TempDir()
    # Nucleotide
    alignbuddy = Alb.AlignBuddy(resource("Mnemiopsis_cds.nex"))

    tester = Pb.generate_tree(Alb.make_copy(alignbuddy), 'fasttree', '-seed 12345')
    assert phylo_to_hash(tester) == 'd7f505182dd1a1744b45cc326096f70c'

    tester = Pb.generate_tree(alignbuddy, 'fasttree', '-seed 12345', quiet=True)
    assert phylo_to_hash(tester) == 'd7f505182dd1a1744b45cc326096f70c'

    alignbuddy = Alb.AlignBuddy(resource("Mnemiopsis_pep.nex"))
    tester = Pb.generate_tree(alignbuddy, 'fasttree', '-seed 12345', keep_temp="%s/new_dir" % temp_dir.path)
    assert phylo_to_hash(tester) == '57eace9bdd2074297cbd2692c1f4cd38'
Beispiel #8
0
def test_concat_alignments_ui(capsys, alb_resources, hf):
    test_in_args = deepcopy(in_args)
    test_in_args.concat_alignments = [[]]

    tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    tester.set_format("genbank")
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "d21940f3dad2295dd647f632825d8541"

    test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "5ac908ebf7918a45664a31da480fda58"

    test_in_args.concat_alignments = [["...", "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f"

    test_in_args.concat_alignments = [[3, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f"

    test_in_args.concat_alignments = [[-9, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "9d2886afc640d35618754e05223032a2"

    test_in_args.concat_alignments = [[3, 3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "4e4101f9b5a6d44d524a9783a8c4004b"

    test_in_args.concat_alignments = [[3, -3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "5d9d9ac8fae604be74c436e5f0b5b6db"

    Alb.command_line_ui(test_in_args, alb_resources.get_one("p o g"), skip_exit=True)
    out, err = capsys.readouterr()
    assert "Please provide at least two alignments." in err

    test_in_args.concat_alignments = [["foo"]]
    Alb.command_line_ui(test_in_args, alb_resources.get_one("m p c"), skip_exit=True)
    out, err = capsys.readouterr()
    assert "No match found for record" in err
Beispiel #9
0
def test_concat_alignments(alb_resources, hf):
    with pytest.raises(AttributeError) as e:
        Alb.concat_alignments(alb_resources.get_one("p o g"), '.*')
    assert "Please provide at least two alignments." in str(e)

    tester = alb_resources.get_one("o p g")
    tester.alignments.append(alb_resources.get_one("o p g").alignments[0])

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'foo')
    assert "No match found for record" in str(e)

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'Panx')
    assert "Replicate matches" in str(e)

    tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*")
    assert hf.buddy2hash(tester2) == 'e754350b0397cf54f531421d1e85774f'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...",
                                    "(P)an(x)(.)")
    assert hf.buddy2hash(tester2) == '5c6653aec09489cadcbed68fbd2f7465'

    shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr")
    tester.alignments[1] = shorten.alignments[1]
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == 'f3ed9139ab6f97042a244d3f791228b6'
Beispiel #10
0
def test_concat_alignments(alb_resources, hf):
    with pytest.raises(AttributeError) as e:
        Alb.concat_alignments(alb_resources.get_one("p o g"), '.*')
    assert "Please provide at least two alignments." in str(e)

    tester = alb_resources.get_one("o p g")
    tester.alignments.append(alb_resources.get_one("o p g").alignments[0])

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'foo')
    assert "No match found for record" in str(e)

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'Panx')
    assert "Replicate matches" in str(e)

    tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*")
    assert hf.buddy2hash(tester2) == 'e49b26f695c910a93f93d70563fd9dd9'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...",
                                    "(P)an(x)(.)")
    assert hf.buddy2hash(tester2) == '3abfdf2217050ac2170c0de27352a8c6'

    shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr")
    tester.alignments[1] = shorten.alignments[1]
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == '685f24ee1fc88860dd9465035040c91e'