Beispiel #1
0
def test_record_guess_refseq():
    ref_seq_nuc = [
        "NM_123456789", "NR_123456789", "XM_123456789", "XR_123456789"
    ]
    for accn in ref_seq_nuc:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_nuc"
        assert rec.type == "nucleotide"

    ref_seq_chrom = ["NC_123456789", "XC_123456789"]
    for accn in ref_seq_chrom:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_nuc"
        assert rec.type == "nucleotide"

    ref_seq_prot = [
        "NM_123456789", "NR_123456789", "XM_123456789", "XR_123456789"
    ]
    for accn in ref_seq_prot:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_nuc"
        assert rec.type == "nucleotide"
Beispiel #2
0
def test_record_update():
    rec = Db.Record("K9WMR5XBZ1")
    summary = OrderedDict([("ACCN", "F6SBJ1"), ("DB", "uniprot"),
                           ("entry_name", "F6SBJ1_HORSE"), ("length", "451"),
                           ("organism-id", "9796"),
                           ("organism", "Equus caballus (Horse)"),
                           ("protein_names", "Caspase"),
                           ("comments",
                            "Caution (1); Sequence similarities (1)"),
                           ("record", "summary")])
    new_rec = Db.Record("F6SBJ1",
                        gi=None,
                        _version=None,
                        _record=None,
                        summary=summary,
                        _size=451,
                        _database="uniprot",
                        _type="protein",
                        _search_term="casp9")
    rec.update(new_rec)
    assert rec.accession == "F6SBJ1"
    assert not rec.gi
    assert not rec.version
    assert not rec.record
    assert list(rec.summary) == [
        "ACCN", "DB", "entry_name", "length", "organism-id", "organism",
        "protein_names", "comments", "record"
    ]
    assert rec.size == 451
    assert rec.database == "uniprot"
    assert rec.type == "protein"
    assert rec.search_term == "casp9"
    assert str(
        rec
    ) == "Accession:\tF6SBJ1\nDatabase:\tuniprot\nRecord:\tNone\nType:\tprotein\n"
Beispiel #3
0
def test_record_instantiation():
    rec = Db.Record("Foo")
    assert rec.accession == "Foo"
    assert not rec.version
    assert not rec.record
    assert not rec.summary
    assert type(rec.summary) == OrderedDict
    assert not rec.size
    assert not rec.database
    assert not rec.type
    assert not rec.search_term
    assert str(rec) == "Accession:\tFoo\nDatabase:\tNone\nRecord:\tNone\nType:\tNone\n"

    rec = Db.Record("Foo", _size='5746')
    assert rec.size == 5746
Beispiel #4
0
def test_ensembl_fetch_nucleotide(monkeypatch, capsys, hf):
    def patch_ensembl_perform_rest_action(*args, **kwargs):
        print("patch_ensembl_perform_rest_action\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        if "info/species" in args:
            with open("%s/ensembl_species.json" % test_files, "r") as ifile:
                return json.load(ifile)
        elif "sequence/id" in args:
            with open("%s/ensembl_sequence.seqxml" % test_files, "r") as ifile:
                tmp_file = br.TempFile(byte_mode=True)
                tmp_file.write(ifile.read().encode())
                return Db.SeqIO.parse(tmp_file.get_handle("r"), "seqxml")

    test_files = "%s/mock_resources/test_databasebuddy_clients/" % hf.resource_path
    monkeypatch.setattr(Db.EnsemblRestClient, "perform_rest_action",
                        patch_ensembl_perform_rest_action)
    dbbuddy = Db.DbBuddy(", ".join(ACCNS[7:]))
    dbbuddy.records['ENSAMEG00000011912'] = Db.Record('ENSAMEG00000011912')
    summary = OrderedDict([('organism', 'macropus_eugenii'),
                           ('comments', 'Blahh blahh blahh'),
                           ('name', 'Foo1')])
    dbbuddy.records['ENSCJAG00000008732'].summary = summary

    client = Db.EnsemblRestClient(dbbuddy)
    client.fetch_nucleotide()

    capsys.readouterr()
    client.dbbuddy.print()
    out, err = capsys.readouterr()
    assert hf.string2hash(out + err) == "bc7610d5373db0b0fd9835410a182f10"
Beispiel #5
0
def test_record_guess_genbank_pdb():
    randomly_generated_from_regex = ["2OOX", "4M7U", "700Y", "6TNH_2", "5CTC_C", "52O0", "3QNM"]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_prot"
        assert rec.type == "protein"
Beispiel #6
0
def test_record_guess_genbank_prot():
    randomly_generated_from_regex = ["TXB10644", "DII59567", "FTJ23865", "SRR43454", "OIJ24077", "HNP42487", "TJS12387"]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_prot"
        assert rec.type == "protein"
Beispiel #7
0
def test_record_guess_genbank_nuc():
    randomly_generated_from_regex = ["PU844519", "I96398", "V72255", "M06308", "KP485089", "T79891", "R36898"]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_nuc"
        assert rec.type == "nucleotide"
def test_search_ensembl(monkeypatch, capsys, hf):
    def patch_ensembl_perform_rest_action(*args, **kwargs):
        print("patch_ensembl_perform_rest_action\nargs: %s\nkwargs: %s" % (args, kwargs))
        with open("%s/ensembl_species.json" % test_files, "r") as ifile:
            return json.load(ifile)

    def patch_search_ensembl_empty(*args, **kwargs):
        print("patch_search_ensembl_empty\nargs: %s\nkwargs: %s" % (args, kwargs))
        return

    def patch_search_ensembl_results(*args, **kwargs):
        print("patch_search_ensembl_empty\nargs: %s\nkwargs: %s" % (args, kwargs))
        with open("%s/ensembl_search_results.txt" % test_files, "r") as ifile:
            client.results_file.write(ifile.read())
        return

    test_files = "%s/mock_resources/test_databasebuddy_clients/" % hf.resource_path
    monkeypatch.setattr(Db.EnsemblRestClient, "perform_rest_action", patch_ensembl_perform_rest_action)
    monkeypatch.setattr(br, "run_multicore_function", patch_search_ensembl_empty)

    dbbuddy = Db.DbBuddy(", ".join(ACCNS[7:]))
    client = Db.EnsemblRestClient(dbbuddy)
    client.dbbuddy.search_terms = ["Panx3"]
    client.dbbuddy.records["ENSLAFG00000006034"] = Db.Record("ENSLAFG00000006034")
    client.search_ensembl()
    out, err = capsys.readouterr()
    assert err == "Searching Ensembl for Panx3...\nEnsembl returned no results\n"
    assert not client.dbbuddy.records["ENSLAFG00000006034"].record

    monkeypatch.setattr(br, "run_multicore_function", patch_search_ensembl_results)
    client.search_ensembl()
    assert hf.string2hash(str(client.dbbuddy)) == "95dc1ecce077bef84cdf2d85ce154eef"
    assert len(client.dbbuddy.records) == 44
    assert client.dbbuddy.records["ENSLAFG00000006034"].database == "ensembl"
Beispiel #9
0
def test_liveshell_do_write(monkeypatch, capsys, hf):
    monkeypatch.setattr(Db.LiveShell, "cmdloop", mock_cmdloop)
    monkeypatch.setattr(Db.LiveShell, "dump_session", lambda _: True)
    dbbuddy = Db.DbBuddy()
    crash_file = br.TempFile(byte_mode=True)
    liveshell = Db.LiveShell(dbbuddy, crash_file)

    load_file = "%s/mock_resources/test_databasebuddy_clients/dbbuddy_save.db" % hf.resource_path
    liveshell.do_load(load_file)
    capsys.readouterr()
    tmp_dir = br.TempDir()

    # write a summary
    monkeypatch.setattr("builtins.input", lambda _: "%s/save1" % tmp_dir.path)
    liveshell.do_write(None)
    assert os.path.isfile("%s/save1" % tmp_dir.path)
    with open("%s/save1" % tmp_dir.path, "r") as ifile:
        assert len(ifile.read()) == 249980
    out, err = capsys.readouterr()
    assert re.search("1407 summary records.*written to.*save1", out)

    # write ids/accns
    dbbuddy.out_format = "ids"
    monkeypatch.setattr(br, "ask", lambda _: True)
    dbbuddy.records['O14727'].record = Db.Record('O14727', _record=True)
    liveshell.do_write("%s/save2" % tmp_dir.path)
    assert os.path.isfile("%s/save2" % tmp_dir.path)
    with open("%s/save2" % tmp_dir.path, "r") as ifile:
        assert len(ifile.read()) == 18661
    out, err = capsys.readouterr()
    assert re.search("1407 accessions.*written to.*save2", out)

    # Abort summary
    monkeypatch.setattr(br, "ask", lambda _: False)
    liveshell.do_write("%s/save3" % tmp_dir.path)
    assert not os.path.isfile("%s/save3" % tmp_dir.path)
    out, err = capsys.readouterr()
    assert "Abort..." in out

    # Permission error
    dbbuddy.out_format = "fasta"
    monkeypatch.setattr("builtins.open", OpenPermissionError)
    liveshell.do_write("%s/save4" % tmp_dir.path)
    assert not os.path.isfile("%s/save4" % tmp_dir.path)
    out, err = capsys.readouterr()
    assert "Error: You do not have write privileges in the specified directory.\n\n" in out

    # File exists
    monkeypatch.setattr(br, "ask", lambda _: False)
    liveshell.do_write("%s/save2" % tmp_dir.path)
    out, err = capsys.readouterr()
    assert "Abort..." in out
    assert "written" not in out

    # Not a directory
    liveshell.do_write("%s/ghostdir/save5" % tmp_dir.path)
    out, err = capsys.readouterr()
    assert "The specified directory does not exist. Please create it before continuing" in out
    assert "written" not in out
Beispiel #10
0
def test_record_guess_genbank_mga():
    randomly_generated_from_regex = ["BJCKQ0111866", "YXRUT6401652", "PVAGD7038775", "OGSVS5937667",
                                     "LPMXX1503516", "NTEWQ3440974", "CTDME6774392"]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_prot"
        assert rec.type == "protein"
Beispiel #11
0
def test_record_guess_uniprot():
    randomly_generated_from_regex = ["K2O417", "I0DZU1", "A8GFV0", "J3K7W6", "O3U582", "C3YWY7GUS7", "Q0L5K7",
                                     "Q5FO16", "K9WMR5XBZ1"]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "uniprot"
        assert rec.type == "protein"
Beispiel #12
0
def test_record_guess_genbank_genome():
    randomly_generated_from_regex = ["LJIJ8045260586", "MRMV14919426", "WBGU8744627061", "WYNM11788712",
                                     "SQVS3339736221", "LVGB461502017", "FAWG101678469"]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_nuc"
        assert rec.type == "nucleotide"
Beispiel #13
0
def test_record_guess_ensembl():
    accns = ["ENSRNOG00000018630", "ENSMUSG00000057666", "ENSPTRG00000004577",
             "ENSCAFG00000015077", "ENSPPYG00000004189", "ENSPCAG00000006928",
             "ENSOPRG00000012514", "ENSECAG00000022051", "ENSTSYG00000002171",
             "FBgn0001987", "FBtr0330306", "FBcl0254909"]
    for accn in accns:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ensembl"
        assert rec.type == "nucleotide"
Beispiel #14
0
def test_record_guess_genbank_gi():
    randomly_generated_from_regex = [
        "13545654", "1445", "9876513546531", "154351", "135464316", "4684315",
        "21240"
    ]
    for accn in randomly_generated_from_regex:
        rec = Db.Record(accn)
        rec.guess_database()
        assert rec.database == "ncbi_nuc"
        assert rec.type == "gi_num"
        assert str(rec.gi) == accn
        assert rec.accession == accn
Beispiel #15
0
 def mock_big_record_no_dl(_dbbuddy):
     _dbbuddy.records["NP_001287575.1"] = Db.Record("NP_001287575.1",
                                                    _size=5000001)
Beispiel #16
0
def test_record_search(sb_resources):
    summary = {
        "ACCN": "F6SBJ1",
        "DB": "uniprot",
        "entry_name": "F6SBJ1_HORSE",
        "length": "451",
        "organism-id": "9796",
        "organism": "Equus caballus (Horse)",
        "protein_names": "Caspase",
        "comments": "Caution (1); Sequence similarities (1)",
        "record": "summary"
    }
    rec = Db.Record("F6SBJ1", summary=summary, _type="protein")
    assert rec.search("*")
    assert not rec.search("Foo")

    # Length operator True
    assert rec.search("(length=451)")
    assert rec.search("(length >=451)")
    assert rec.search("(length<= 451)")
    assert rec.search("(length > 200)")
    assert rec.search("(length<500)")

    # Length operator False
    assert not rec.search("(length=452)")
    assert not rec.search("(length>=452)")
    assert not rec.search("(length<=450)")
    assert not rec.search("(length>500)")
    assert not rec.search("(length<200)")

    # Length operator errors
    with pytest.raises(ValueError) as err:
        rec.search("(length!<200)")
    assert "Invalid syntax for seaching 'length': length!<200" in str(err)

    with pytest.raises(ValueError) as err:
        rec.search("(length<>200)")
    assert "Invalid operator: <>" in str(err)
    del rec.summary['length']
    assert not rec.search("(length>200)")

    # Other columns
    assert rec.search("(ACCN) [A-Z0-9]{6}")
    assert not rec.search("(ACCN) [A-Z0-9]{7}")
    print(rec.type)
    assert rec.search("(Type) prot")
    assert not rec.search("(Type) nucl")
    assert rec.search("(DB) uniprot")
    assert not rec.search("(DB) ncbi")
    assert rec.search("(comments)(Caution|Blahh)")
    assert not rec.search("(organism)Sheep")
    assert rec.search("(entry_name)")
    assert rec.search("(entry_name) ")
    assert not rec.search("(foo_name)")

    # No columns -> params
    assert rec.search("F6SBJ1")
    assert rec.search("uniprot")
    assert rec.search("protein")

    # No columns -> summary
    assert rec.search("Equus")
    assert not rec.search("equus")
    assert rec.search("i?equus")
    assert rec.search("?iEqUuS")

    # Genbank record
    sb_obj = sb_resources.get_one("p g")
    rec = Db.Record("Mle-Panxα8", _record=sb_obj.records[4])
    assert rec.search("Innexin")
    assert not rec.search("ML07312abcd")
Beispiel #17
0
def test_uniprotrestclient_fetch_proteins(monkeypatch, capsys, hf):
    def patch_query_uniprot_search(*args, **kwargs):
        print("patch_query_uniprot_search\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        client.results_file.write(
            '''# Search: inx15
A8XEF9	A8XEF9_CAEBR	381	6238	Caenorhabditis briggsae	Innexin	Function (1); Sequence similarities (1); \
Subcellular location (2)
O61786	O61786_CAEEL	382	6239	Caenorhabditis elegans	Innexin	Function (1); Sequence similarities (1); \
Subcellular location (2)
A0A0H5SBJ0	A0A0H5SBJ0_BRUMA	129	6279	Brugia malayi (Filarial nematode worm)	Innexin	Function (1); Sequence \
similarities (1); Subcellular location (1)
E3MGD6	E3MGD6_CAERE	384	31234	Caenorhabditis remanei (Caenorhabditis vulgaris)	Innexin	Function (1); \
Sequence similarities (1); Subcellular location (2)
//
# Search: inx16
O61787	INX16_CAEEL	372	6239	Caenorhabditis elegans	Innexin-16 (Protein opu-16)	Function (1); Sequence \
similarities (1); Subcellular location (1)
A0A0V1AZ11	A0A0V1AZ11_TRISP	406	6334	Trichinella spiralis (Trichina worm)	Innexin	Caution (1); Function (1); \
Sequence similarities (1); Subcellular location (2)
A8XEF8	A8XEF8_CAEBR	374	6238	Caenorhabditis briggsae	Innexin	Function (1); Sequence similarities (1); \
Subcellular location (2)
A0A0B2VB60	A0A0B2VB60_TOXCA	366	6265	Toxocara canis (Canine roundworm)	Innexin	Caution (2); Function (1); \
Sequence similarities (1); Subcellular location (1)
A0A0V0W5E2	A0A0V0W5E2_9BILA	410	92179	Trichinella sp. T6	Innexin	Caution (2); Function (1); Sequence \
similarities (1); Subcellular location (1)
//''', "w")
        return

    def patch_query_uniprot_fetch(*args, **kwargs):
        print("patch_query_uniprot_fetch\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        with open("%s/mock_resources/test_databasebuddy_clients/uniprot_fetch.txt" % hf.resource_path, "r") \
                as ifile:
            client.results_file.write(ifile.read(), "w")
        return

    def patch_query_uniprot_fetch_nothing(*args, **kwargs):
        print("patch_query_uniprot_fetch_nothing\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        client.results_file.write(
            "# Search: A8XEF9,O61786,A0A0H5SBJ0,E3MGD6,O61787,A0A0V1AZ11,A8XEF8,A0A0B2VB60,"
            "A0A0V0W5E2\n//\n//", "w")
        return

    monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot", lambda _: True)
    dbbuddy = Db.DbBuddy("inx15,inx16")
    client = Db.UniProtRestClient(dbbuddy)
    client.fetch_proteins()

    out, err = capsys.readouterr()
    assert client.results_file.read() == ""
    assert "full records from UniProt..." not in err

    # Test a single call to query_uniprot
    monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot",
                        patch_query_uniprot_search)
    client.search_proteins()
    monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot",
                        patch_query_uniprot_fetch)
    client.fetch_proteins()
    out, err = capsys.readouterr()
    assert "Requesting 9 full records from UniProt..." in err

    # Test multicore call to query_uniprot
    monkeypatch.setattr(br, "run_multicore_function",
                        patch_query_uniprot_fetch)
    for accn, rec in client.dbbuddy.records.items():
        rec.record = None
    client.dbbuddy.records["a" * 999] = Db.Record("a" * 999,
                                                  _database="uniprot")
    client.fetch_proteins()
    out, err = capsys.readouterr()
    assert "Requesting 10 full records from UniProt..." in err
    seq = str(client.dbbuddy.records["A8XEF9"].record.seq)
    assert hf.string2hash(seq) == "04f13629336cf6cdd5859c8913b742a5"

    # Some edge cases
    monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot",
                        patch_query_uniprot_fetch_nothing)
    client.http_errors_file.write("inx15\n%s\n//\n" %
                                  URLError("Fake URLError from Mock"))

    client.dbbuddy.records = OrderedDict([("a" * 999,
                                           Db.Record("a" * 999,
                                                     _database="uniprot"))])
    client.fetch_proteins()
    out, err = capsys.readouterr()
    assert "Requesting 1 full records from UniProt..." in err
    assert "No sequences returned\n\n" in err
    assert "The following errors were encountered while querying UniProt with fetch_proteins():" in err
    assert hf.string2hash(str(
        client.dbbuddy.records["a" *
                               999])) == "670bf9c6ae5832b42841798d882a7276"

    with pytest.raises(ValueError) as err:
        client.dbbuddy.records["a" * 1001] = Db.Record("a" * 1001,
                                                       _database="uniprot")
        client.fetch_proteins()
    assert "The provided accession or search term is too long (>1000)." in str(
        err)