Ejemplo n.º 1
0
def test_bootstrap_ui(capsys, alb_resources):
    test_in_args = deepcopy(in_args)
    test_in_args.bootstrap = [False]
    Alb.command_line_ui(test_in_args, alb_resources.get_one("m p s"), skip_exit=True)
    out, err = capsys.readouterr()
    tester = Alb.AlignBuddy(out)
    assert tester.lengths() == [481, 683]

    test_in_args.bootstrap = [3]
    Alb.command_line_ui(test_in_args, alb_resources.get_one("m p s"), skip_exit=True)
    out, err = capsys.readouterr()
    tester = Alb.AlignBuddy(out)
    assert tester.lengths() == [481, 481, 481, 683, 683, 683]
Ejemplo n.º 2
0
def test_phylip_sequential_read(alb_odd_resources, hf, capsys):
    records = br.phylip_sequential_read(open("{0}Mnemiopsis_cds.physr".format(RESOURCE_PATH),
                                             "r", encoding="utf-8").read())
    buddy = Alb.AlignBuddy(records, out_format="phylipsr")
    assert hf.buddy2hash(buddy) == "c5fb6a5ce437afa1a4004e4f8780ad68"

    records = br.phylip_sequential_read(open("{0}Mnemiopsis_cds.physs".format(RESOURCE_PATH),
                                             "r", encoding="utf-8").read(), relaxed=False)
    buddy = Alb.AlignBuddy(records, out_format="phylipss")
    assert hf.buddy2hash(buddy) == "4c0c1c0c63298786e6fb3db1385af4d5"

    with open(alb_odd_resources['dna']['single']['phylipss_cols'], "r", encoding="utf-8") as ifile:
            records = ifile.read()
    with pytest.raises(br.PhylipError) as err:
        br.phylip_sequential_read(records)
    assert "Malformed Phylip --> Less sequence found than expected" in str(err)

    with open(alb_odd_resources['dna']['single']['phylipss_recs'], "r", encoding="utf-8") as ifile:
            records = ifile.read()
    with pytest.raises(br.PhylipError) as err:
        br.phylip_sequential_read(records)
    assert "Malformed Phylip --> 9 sequences expected, 4 found." in str(err)

    capsys.readouterr()

    records = """  3 15
Mle-Panxα4  M--VIE---------A
Mle-Panxα8  M--VLE---------A
Mle-Panxα6  M--LLE----------A
"""
    with pytest.raises(br.PhylipError) as err:
        br.phylip_sequential_read(records)
    assert "Malformed Phylip --> Sequence Mle-Panxα4 has 16 columns, 15 expected." in str(err)

    records = """  3 15
Mle-Panxα4  M--VIE--------A
Mle-Panxα8  M--VLE--------A
Mle-Panxα8  M--LLE--------A
"""
    with pytest.raises(br.PhylipError) as err:
        br.phylip_sequential_read(records)
    assert "Malformed Phylip --> Repeat ID Mle-Panxα8." in str(err)

    records = """  3 15
Mle-Panxα4M--VIE--------A
Mle-Panxα8M--VLE--------A
Mle-Panxα8M--LLE--------A
"""
    with pytest.raises(br.PhylipError) as err:
        br.phylip_sequential_read(records, relaxed=False)
    assert "Malformed Phylip --> Repeat id 'Mle-Panxα8' after strict truncation. " in str(err)
Ejemplo n.º 3
0
def test_trimal2(alb_resources, hf):
    tester = Alb.trimal(alb_resources.get_one("o p n"), 'all')
    assert hf.buddy2hash(tester) == "8faaf09741ddb3137653cb77ee66974a"
    tester = alb_resources.get_one("o p n")
    tester.alignments[0]._records = tester.alignments[0]._records[:5]
    Alb.trimal(tester, 'clean')
    assert hf.buddy2hash(tester) == "93a2aa21e6baf5ca70eb2de52ae8dbea"
    tester = alb_resources.get_one("o p n")
    tester_dir = TEMPDIR.subdir()
    tester.write("%s%strimal" % (tester_dir, os.path.sep))
    assert hf.buddy2hash(Alb.trimal(
        tester, 'gappyout')) == "2877ecfb201fc35211a4625f34c7afdd"
    """ Probably not a good idea to be calling binaries like this...
    real_trimal = Popen("trimal -in %s%strimal -gappyout" % (tester_dir, os.path.sep),
                        stdout=PIPE, shell=True).communicate()
    real_trimal = real_trimal[0].decode()
    with open("%s%strimal" % (tester_dir, os.path.sep), "w") as ofile:
        ofile.write(real_trimal)
    tester = Alb.AlignBuddy("%s%strimal" % (tester_dir, os.path.sep))
    assert hf.buddy2hash(tester) == "2877ecfb201fc35211a4625f34c7afdd"
    """
    records = [
        SeqRecord(Seq("A--G-")),
        SeqRecord(Seq("--T--")),
        SeqRecord(Seq("--TG-")),
        SeqRecord(Seq("A---C"))
    ]
    tester = Alb.AlignBuddy([MultipleSeqAlignment(records)])
    Alb.trimal(tester, "gappyout")
    assert "".join([str(rec.seq) for rec in tester.records()]) == ""
Ejemplo n.º 4
0
def test_delete_invariant_sites_ui(capsys, hf, alb_odd_resources):
    test_in_args = deepcopy(in_args)
    test_in_args.delete_invariant_sites = [[]]
    tester = Alb.AlignBuddy(alb_odd_resources['dna']['single']['ambiguous'])
    Alb.command_line_ui(test_in_args, tester, skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "27233a416437eabc72aa5d57cb695036"
Ejemplo n.º 5
0
def test_clean_seqs(alb_resources, hf):
    # Test an amino acid file
    tester = Alb.clean_seq(alb_resources.get_one("m p py"))
    assert hf.buddy2hash(tester) == "07a861a1c80753e7f89f092602271072"

    tester = Alb.clean_seq(Alb.AlignBuddy("%sambiguous_dna_alignment.fa" % hf.resource_path),
                           ambiguous=False, rep_char="X")
    assert hf.buddy2hash(tester) == "6755ea1408eddd0e5f267349c287d989"
Ejemplo n.º 6
0
    def __init__(self):
        base_dict_structure = {'dna': {'single': {}, 'multi': {}},
                               'rna': {'single': {}, 'multi': {}},
                               'pep': {'single': {}, 'multi': {}}}

        self.resources = deepcopy(base_dict_structure)
        self.resources['dna']['single'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [
            ("clustal", "{path}Mnemiopsis_cds.clus"),
            ("fasta", "{path}Mnemiopsis_cds_aln.fa"),
            ("gb", "{path}Mnemiopsis_cds_aln.gb"),
            ("nexus", "{path}Mnemiopsis_cds.nex"),
            ("phylip", "{path}Mnemiopsis_cds.phy"),
            ("phylipr", "{path}Mnemiopsis_cds.phyr"),
            ("phylipss", "{path}Mnemiopsis_cds.physs"),
            ("phylipsr", "{path}Mnemiopsis_cds.physr"),
            ("stockholm", "{path}Mnemiopsis_cds.stklm")]}

        self.resources['dna']['multi'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [
            ("clustal", "{path}Alignments_cds.clus"),
            ("phylip", "{path}Alignments_cds.phy"),
            ("phylipr", "{path}Alignments_cds.phyr"),
            ("phylipss", "{path}Alignments_cds.physs"),
            ("phylipsr", "{path}Alignments_cds.physr"),
            ("stockholm", "{path}Alignments_cds.stklm")]}
        self.resources['rna']['single'] = {"nexus": "{path}Mnemiopsis_rna.nex".format(path=RESOURCE_PATH)}
        self.resources['rna']['multi'] = {}
        self.resources['pep']['single'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [
            ("gb", "{path}Mnemiopsis_pep_aln.gb"),
            ("nexus", "{path}Mnemiopsis_pep.nex"),
            ("phylip", "{path}Mnemiopsis_pep.phy"),
            ("phylipr", "{path}Mnemiopsis_pep.phyr"),
            ("phylipss", "{path}Mnemiopsis_pep.physs"),
            ("phylipsr", "{path}Mnemiopsis_pep.physr"),
            ("stockholm", "{path}Mnemiopsis_pep.stklm")]}
        self.resources['pep']['multi'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [
            ("clustal", "{path}Alignments_pep.clus"),
            ("phylip", "{path}Alignments_pep.phy"),
            ("phylipr", "{path}Alignments_pep.phyr"),
            ("phylipss", "{path}Alignments_pep.physs"),
            ("phylipsr", "{path}Alignments_pep.physr"),
            ("stockholm", "{path}Alignments_pep.stklm")]}

        # Create new AlignBuddy objects for each resource file
        self.alb_objs = deepcopy(base_dict_structure)
        for mol in self.resources:
            for num in self.resources[mol]:
                for file_format in self.resources[mol][num]:
                    self.alb_objs[mol][num][file_format] = Alb.AlignBuddy(self.resources[mol][num][file_format])

        self.code_dict = {"molecule": {"p": "pep", "d": "dna", "r": "rna"},
                          "num_aligns": {"o": "single", "m": "multi"},
                          "format": {"c": "clustal", "f": "fasta", "g": "gb", "n": "nexus", "py": "phylip",
                                     "pr": "phylipr", "pss": "phylipss", "psr": "phylipsr", "s": "stockholm"}}

        self.single_letter_codes = {"p": "pep", "d": "dna", "r": "rna",
                                    "o": "single", "m": "multi",
                                    "c": "clustal", "f": "fasta", "g": "gb", "n": "nexus", "py": "phylip",
                                    "pr": "phylipr", "pss": "phylipss", "psr": "phylipsr", "s": "stockholm"}
Ejemplo n.º 7
0
def test_faux_align_ui(capsys, alb_resources):
    test_in_args = deepcopy(in_args)
    test_in_args.faux_align = [None]
    test_in_args.alignments = [alb_resources.get_one("o p g", "paths")]

    Alb.command_line_ui(test_in_args, Alb.AlignBuddy, skip_exit=True)
    out, err = capsys.readouterr()
    alignbuddy = Alb.AlignBuddy(out)
    assert len(alignbuddy.alignments[0][0]) == 625
Ejemplo n.º 8
0
def test_concat_alignments_ui(capsys, alb_resources, hf):
    test_in_args = deepcopy(in_args)
    test_in_args.concat_alignments = [[]]

    tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    tester.set_format("genbank")
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "86349e715f41e0bdd91bbd1dc0914769"

    test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "cd2b6594b22c431aea67fa45899f933a"

    test_in_args.concat_alignments = [["...", "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9"

    test_in_args.concat_alignments = [[3, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9"

    test_in_args.concat_alignments = [[-9, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "aaa9d9b717a5f79cfdf5d2666fb0f687"

    test_in_args.concat_alignments = [[3, 3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "2f37a4e395162032bf43fab291c882f4"

    test_in_args.concat_alignments = [[3, -3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "7fa8cd803df82414a5e1e190916456d8"

    Alb.command_line_ui(test_in_args,
                        alb_resources.get_one("p o g"),
                        skip_exit=True)
    out, err = capsys.readouterr()
    assert "Please provide at least two alignments." in err

    test_in_args.concat_alignments = [["foo"]]
    Alb.command_line_ui(test_in_args,
                        alb_resources.get_one("m p c"),
                        skip_exit=True)
    out, err = capsys.readouterr()
    assert "No match found for record" in err
Ejemplo n.º 9
0
def test_clean_seqs_ui(capsys, alb_resources, alb_odd_resources, hf):
    test_in_args = deepcopy(in_args)
    test_in_args.clean_seq = [[None]]
    Alb.command_line_ui(test_in_args, alb_resources.get_one("m p pr"), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "73b5d11dd25dd100648870228ab10d3d"

    test_in_args.clean_seq = [['strict', 'X']]
    Alb.command_line_ui(test_in_args, Alb.AlignBuddy(alb_odd_resources['dna']['single']['ambiguous']), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "6755ea1408eddd0e5f267349c287d989"
Ejemplo n.º 10
0
def test_concat_alignments_ui(capsys, alb_resources, hf):
    test_in_args = deepcopy(in_args)
    test_in_args.concat_alignments = [[]]

    tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    tester.set_format("genbank")
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "d21940f3dad2295dd647f632825d8541"

    test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "5ac908ebf7918a45664a31da480fda58"

    test_in_args.concat_alignments = [["...", "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f"

    test_in_args.concat_alignments = [[3, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f"

    test_in_args.concat_alignments = [[-9, "Panx.*"]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "9d2886afc640d35618754e05223032a2"

    test_in_args.concat_alignments = [[3, 3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "4e4101f9b5a6d44d524a9783a8c4004b"

    test_in_args.concat_alignments = [[3, -3]]
    Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True)
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "5d9d9ac8fae604be74c436e5f0b5b6db"

    Alb.command_line_ui(test_in_args, alb_resources.get_one("p o g"), skip_exit=True)
    out, err = capsys.readouterr()
    assert "Please provide at least two alignments." in err

    test_in_args.concat_alignments = [["foo"]]
    Alb.command_line_ui(test_in_args, alb_resources.get_one("m p c"), skip_exit=True)
    out, err = capsys.readouterr()
    assert "No match found for record" in err
Ejemplo n.º 11
0
def create_all_by_all_scores(seqbuddy, quiet=False):
    """
    Generate a multiple sequence alignment and pull out all-by-all similarity graph
    :param seqbuddy: SeqBuddy object
    :param quiet: Supress multicore output
    :return:
    """
    if len(seqbuddy) == 1:
        alignment = Alb.AlignBuddy(str(seqbuddy))
        sim_scores = pd.DataFrame(data=None, columns=["seq1", "seq2", "score"])
    else:
        alignment = Alb.generate_msa(Sb.make_copy(seqbuddy), tool="mafft", params="--globalpair --thread -1", quiet=True)

        # Need to specify what columns the PsiPred files map to now that there are gaps.
        psi_pred_files = {}
        for rec in alignment.records_iter():
            ss_file = pd.read_csv("%s/psi_pred/%s.ss2" % (in_args.outdir, rec.id), comment="#",
                                  header=None, delim_whitespace=True)
            ss_file.columns = ["indx", "aa", "ss", "coil_prob", "helix_prob", "sheet_prob"]
            ss_counter = 0
            for indx, residue in enumerate(rec.seq):
                if residue != "-":
                    ss_file.set_value(ss_counter, "indx", indx)
                    ss_counter += 1
            psi_pred_files[rec.id] = ss_file

        alignment = Alb.trimal(alignment, "gappyout")

        # Re-update PsiPred files, now that some columns are removed
        for rec in alignment.records_iter():
            new_psi_pred = []
            for row in psi_pred_files[rec.id].itertuples():
                if alignment.alignments[0].position_map[int(row[1])][1]:
                    new_psi_pred.append(list(row)[1:])
            psi_pred_files[rec.id] = pd.DataFrame(new_psi_pred, columns=["indx", "aa", "ss", "coil_prob",
                                                                         "helix_prob", "sheet_prob"])
        ids1 = [rec.id for rec in alignment.records_iter()]
        ids2 = [rec.id for rec in alignment.records_iter()]
        all_by_all = []
        for rec1 in ids1:
            del ids2[ids2.index(rec1)]
            for rec2 in ids2:
                all_by_all.append((rec1, rec2))

        outfile = MyFuncs.TempFile()
        outfile.write("seq1,seq2,score")
        printer.clear()
        MyFuncs.run_multicore_function(all_by_all, score_sequences, [alignment, psi_pred_files, outfile.path], quiet=quiet)
        sim_scores = pd.read_csv(outfile.path, index_col=False)
    return alignment, sim_scores
Ejemplo n.º 12
0
def test_delete_invariant_sites(alb_resources, hf, alb_odd_resources):
    tester = Alb.AlignBuddy(alb_odd_resources['dna']['single']['ambiguous'])
    tester = Alb.delete_invariant_sites(tester)
    assert hf.buddy2hash(tester) == "27233a416437eabc72aa5d57cb695036"

    tester = alb_resources.get_one("o p py")
    tester = Alb.delete_invariant_sites(tester, consider_ambiguous=False)
    assert hf.buddy2hash(tester) == "f0b16bb8133bfc9e29ad43bdfc4ad2ee"

    tester = Alb.delete_invariant_sites(tester)
    assert hf.buddy2hash(tester) == "c13031016c1f7382e808bd4e68d8f406"

    tester.alignments.append([])  # Catch empty alignment
    tester = Alb.delete_invariant_sites(tester)
    assert hf.buddy2hash(tester) == "c13031016c1f7382e808bd4e68d8f406"
Ejemplo n.º 13
0
def test_concat_alignments(alb_resources, hf):
    with pytest.raises(AttributeError) as e:
        Alb.concat_alignments(alb_resources.get_one("p o g"), '.*')
    assert "Please provide at least two alignments." in str(e)

    tester = alb_resources.get_one("o p g")
    tester.alignments.append(alb_resources.get_one("o p g").alignments[0])

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'foo')
    assert "No match found for record" in str(e)

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'Panx')
    assert "Replicate matches" in str(e)

    tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*")
    assert hf.buddy2hash(tester2) == 'e754350b0397cf54f531421d1e85774f'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...",
                                    "(P)an(x)(.)")
    assert hf.buddy2hash(tester2) == '5c6653aec09489cadcbed68fbd2f7465'

    shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr")
    tester.alignments[1] = shorten.alignments[1]
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == 'f3ed9139ab6f97042a244d3f791228b6'
Ejemplo n.º 14
0
def test_concat_alignments(alb_resources, hf):
    with pytest.raises(AttributeError) as e:
        Alb.concat_alignments(alb_resources.get_one("p o g"), '.*')
    assert "Please provide at least two alignments." in str(e)

    tester = alb_resources.get_one("o p g")
    tester.alignments.append(alb_resources.get_one("o p g").alignments[0])

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'foo')
    assert "No match found for record" in str(e)

    with pytest.raises(ValueError) as e:
        Alb.concat_alignments(tester, 'Panx')
    assert "Replicate matches" in str(e)

    tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path)
    Sb.pull_recs(tester, "Ccr|Cla|Hec")
    tester = Alb.AlignBuddy(str(tester))
    tester.alignments.append(tester.alignments[0])
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)")
    assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*")
    assert hf.buddy2hash(tester2) == 'e49b26f695c910a93f93d70563fd9dd9'

    tester.set_format("gb")
    tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...",
                                    "(P)an(x)(.)")
    assert hf.buddy2hash(tester2) == '3abfdf2217050ac2170c0de27352a8c6'

    shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr")
    tester.alignments[1] = shorten.alignments[1]
    assert hf.buddy2hash(Alb.concat_alignments(
        Alb.make_copy(tester))) == '685f24ee1fc88860dd9465035040c91e'
Ejemplo n.º 15
0
def test_inplace(capsys, alb_resources, hf):
    tmp_dir = br.TempDir()
    tester = alb_resources.get_one("o d f")
    tester.write("%s/align" % tmp_dir.path)

    test_in_args = deepcopy(in_args)
    test_in_args.transcribe = True
    test_in_args.in_place = True
    test_in_args.alignments = ["%s/align" % tmp_dir.path]

    Alb.command_line_ui(test_in_args, tester, skip_exit=True)
    out, err = capsys.readouterr()
    tester = Alb.AlignBuddy("%s/align" % tmp_dir.path)
    assert "File over-written at:" in err
    assert hf.buddy2hash(tester) == "8f78e0c99e2d6d7d9b89b8d854e02bcd", tester.write("temp.del")

    test_in_args.alignments = ["I/do/not/exist"]
    Alb.command_line_ui(test_in_args, alb_resources.get_one("o d f"), skip_exit=True)
    out, err = capsys.readouterr()
    assert "Warning: The -i flag was passed in, but the positional argument doesn't seem to be a file." in err
Ejemplo n.º 16
0
def main(in_args):
    wsdl_url = "http://v2.topcons.net/pred/api_submitseq/?wsdl"
    fixtop = ""

    if os.path.isfile(in_args.input):
        try:
            seqbuddy = Sb.SeqBuddy(in_args.input, out_format="fasta")
            Sb.clean_seq(seqbuddy)
            # Sb.hash_ids(seqbuddy)

        except br.GuessError:
            print("Unable to read the provided input file, is it a properly formatted sequence file?")
            return 1

        if len(str(seqbuddy)) >= MAX_FILESIZE:
            print("You input seqfile is too large! Please split the file into chunks less than %d Mb."
                  % MAX_FILESIZE_IN_MB, file=sys.stderr)
            return 1

        # ***** Here's the meat ***** #
        myclient = Client(wsdl_url, cache=None)
        ret_value = myclient.service.submitjob(str(seqbuddy), fixtop, in_args.jobname, in_args.email)
        if len(ret_value) >= 1:
            jobid, result_url, numseq_str, errinfo, warninfo = ret_value[0][:5]
            if jobid != "None" and jobid != "":
                print("You have successfully submitted your job with %s sequences. jobid = %s" % (numseq_str, jobid))
                if warninfo != "" and warninfo != "None":
                    print("Warning message:\n%s" % warninfo)
            else:
                print("Failed to submit job!")
                if errinfo != "" and errinfo != "None":
                    print("Error message:\n%s" % errinfo)
                if warninfo != "" and warninfo != "None":
                    print("Warning message:\n%s" % warninfo)
        else:
            print("Failed to submit job!")
            return 1
    else:
        myclient = Client(wsdl_url, cache=None)
        ret_value = myclient.service.checkjob(in_args.input)
        if len(ret_value) >= 1:
            status, result_url, errinfo = ret_value[0][:3]

            if status == "Failed":
                print("Your job with jobid %s is failed!" % in_args.input)
                if errinfo != "" and errinfo != "None":
                    print("Error message:\n%s" % errinfo)
            elif status == "Finished":
                print("Your job with jobid %s is finished!" % in_args.input)
                if not os.path.exists(in_args.outpath):
                    try:
                        os.makedirs(in_args.outpath)
                    except OSError:
                        print("Failed to create the outpath %s" % in_args.outpath)
                        return 1
                outfile = "%s/%s.zip" % (in_args.outpath, in_args.input)
                if not os.path.exists(outfile):
                    print("Retrieving")
                    urllib.request.urlretrieve(result_url, outfile)
                    if os.path.exists(outfile):
                        print("The result file %s has been retrieved for jobid %s" % (outfile, in_args.input))
                    else:
                        print("Failed to retrieve result for jobid %s" % in_args.input)

                with zipfile.ZipFile(outfile) as zf:
                    zf.extractall(in_args.outpath)

                with open("%s/%s/query.result.txt" % (in_args.outpath, in_args.input), "r") as ifile:
                    topcons = ifile.read()

                topcons = topcons.split("##############################################################################")[2:-1]

                records = []
                for rec in topcons:
                    seq_id = re.search("Sequence name: (.*)", rec).group(1).strip()
                    seq = re.search("Sequence:\n([A-Z]+)", rec).group(1).strip()
                    alignment = ""
                    for algorithm in ["TOPCONS", "OCTOPUS", "Philius", "PolyPhobius", "SCAMPI", "SPOCTOPUS"]:
                        try:
                            top_file = re.search("%s predicted topology:\n([ioMS]+)" % algorithm, rec).group(1).strip()
                            alignment += ">%s\n%s\n\n" % (algorithm, top_file)
                        except:
                            print("%s: %s" % (seq_id, algorithm))
                            pass

                    alignment = Alb.AlignBuddy(alignment)
                    # print(alignment)
                    Alb.consensus_sequence(alignment)
                    cons_seq = Sb.SeqBuddy(">%s\n%s\n" % (seq_id, seq), out_format="genbank")
                    counter = 1
                    for tmd in re.finditer("([MX]+)", str(alignment.records()[0].seq)):
                        Sb.annotate(cons_seq, "TMD%s" % counter, "%s-%s" % (tmd.start(), tmd.end()))
                        counter += 1
                    records.append(cons_seq.records[0])

                seqbuddy = Sb.SeqBuddy(records, out_format="genbank")
                seqbuddy.write("%s/%s.gb" % (in_args.outpath, in_args.input))

            elif status == "None":
                print("Your job with jobid %s does not exist! Please check you typing!" % in_args.input)
            else:
                print("Your job with jobid %s is not ready, status = %s" % (in_args.input, status))
        else:
            print("Failed to get job!")
            return 1

    return 0