def test_record_guess_refseq(): ref_seq_nuc = [ "NM_123456789", "NR_123456789", "XM_123456789", "XR_123456789" ] for accn in ref_seq_nuc: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_nuc" assert rec.type == "nucleotide" ref_seq_chrom = ["NC_123456789", "XC_123456789"] for accn in ref_seq_chrom: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_nuc" assert rec.type == "nucleotide" ref_seq_prot = [ "NM_123456789", "NR_123456789", "XM_123456789", "XR_123456789" ] for accn in ref_seq_prot: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_nuc" assert rec.type == "nucleotide"
def test_record_update(): rec = Db.Record("K9WMR5XBZ1") summary = OrderedDict([("ACCN", "F6SBJ1"), ("DB", "uniprot"), ("entry_name", "F6SBJ1_HORSE"), ("length", "451"), ("organism-id", "9796"), ("organism", "Equus caballus (Horse)"), ("protein_names", "Caspase"), ("comments", "Caution (1); Sequence similarities (1)"), ("record", "summary")]) new_rec = Db.Record("F6SBJ1", gi=None, _version=None, _record=None, summary=summary, _size=451, _database="uniprot", _type="protein", _search_term="casp9") rec.update(new_rec) assert rec.accession == "F6SBJ1" assert not rec.gi assert not rec.version assert not rec.record assert list(rec.summary) == [ "ACCN", "DB", "entry_name", "length", "organism-id", "organism", "protein_names", "comments", "record" ] assert rec.size == 451 assert rec.database == "uniprot" assert rec.type == "protein" assert rec.search_term == "casp9" assert str( rec ) == "Accession:\tF6SBJ1\nDatabase:\tuniprot\nRecord:\tNone\nType:\tprotein\n"
def test_record_instantiation(): rec = Db.Record("Foo") assert rec.accession == "Foo" assert not rec.version assert not rec.record assert not rec.summary assert type(rec.summary) == OrderedDict assert not rec.size assert not rec.database assert not rec.type assert not rec.search_term assert str(rec) == "Accession:\tFoo\nDatabase:\tNone\nRecord:\tNone\nType:\tNone\n" rec = Db.Record("Foo", _size='5746') assert rec.size == 5746
def test_ensembl_fetch_nucleotide(monkeypatch, capsys, hf): def patch_ensembl_perform_rest_action(*args, **kwargs): print("patch_ensembl_perform_rest_action\nargs: %s\nkwargs: %s" % (args, kwargs)) if "info/species" in args: with open("%s/ensembl_species.json" % test_files, "r") as ifile: return json.load(ifile) elif "sequence/id" in args: with open("%s/ensembl_sequence.seqxml" % test_files, "r") as ifile: tmp_file = br.TempFile(byte_mode=True) tmp_file.write(ifile.read().encode()) return Db.SeqIO.parse(tmp_file.get_handle("r"), "seqxml") test_files = "%s/mock_resources/test_databasebuddy_clients/" % hf.resource_path monkeypatch.setattr(Db.EnsemblRestClient, "perform_rest_action", patch_ensembl_perform_rest_action) dbbuddy = Db.DbBuddy(", ".join(ACCNS[7:])) dbbuddy.records['ENSAMEG00000011912'] = Db.Record('ENSAMEG00000011912') summary = OrderedDict([('organism', 'macropus_eugenii'), ('comments', 'Blahh blahh blahh'), ('name', 'Foo1')]) dbbuddy.records['ENSCJAG00000008732'].summary = summary client = Db.EnsemblRestClient(dbbuddy) client.fetch_nucleotide() capsys.readouterr() client.dbbuddy.print() out, err = capsys.readouterr() assert hf.string2hash(out + err) == "bc7610d5373db0b0fd9835410a182f10"
def test_record_guess_genbank_pdb(): randomly_generated_from_regex = ["2OOX", "4M7U", "700Y", "6TNH_2", "5CTC_C", "52O0", "3QNM"] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_prot" assert rec.type == "protein"
def test_record_guess_genbank_prot(): randomly_generated_from_regex = ["TXB10644", "DII59567", "FTJ23865", "SRR43454", "OIJ24077", "HNP42487", "TJS12387"] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_prot" assert rec.type == "protein"
def test_record_guess_genbank_nuc(): randomly_generated_from_regex = ["PU844519", "I96398", "V72255", "M06308", "KP485089", "T79891", "R36898"] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_nuc" assert rec.type == "nucleotide"
def test_search_ensembl(monkeypatch, capsys, hf): def patch_ensembl_perform_rest_action(*args, **kwargs): print("patch_ensembl_perform_rest_action\nargs: %s\nkwargs: %s" % (args, kwargs)) with open("%s/ensembl_species.json" % test_files, "r") as ifile: return json.load(ifile) def patch_search_ensembl_empty(*args, **kwargs): print("patch_search_ensembl_empty\nargs: %s\nkwargs: %s" % (args, kwargs)) return def patch_search_ensembl_results(*args, **kwargs): print("patch_search_ensembl_empty\nargs: %s\nkwargs: %s" % (args, kwargs)) with open("%s/ensembl_search_results.txt" % test_files, "r") as ifile: client.results_file.write(ifile.read()) return test_files = "%s/mock_resources/test_databasebuddy_clients/" % hf.resource_path monkeypatch.setattr(Db.EnsemblRestClient, "perform_rest_action", patch_ensembl_perform_rest_action) monkeypatch.setattr(br, "run_multicore_function", patch_search_ensembl_empty) dbbuddy = Db.DbBuddy(", ".join(ACCNS[7:])) client = Db.EnsemblRestClient(dbbuddy) client.dbbuddy.search_terms = ["Panx3"] client.dbbuddy.records["ENSLAFG00000006034"] = Db.Record("ENSLAFG00000006034") client.search_ensembl() out, err = capsys.readouterr() assert err == "Searching Ensembl for Panx3...\nEnsembl returned no results\n" assert not client.dbbuddy.records["ENSLAFG00000006034"].record monkeypatch.setattr(br, "run_multicore_function", patch_search_ensembl_results) client.search_ensembl() assert hf.string2hash(str(client.dbbuddy)) == "95dc1ecce077bef84cdf2d85ce154eef" assert len(client.dbbuddy.records) == 44 assert client.dbbuddy.records["ENSLAFG00000006034"].database == "ensembl"
def test_liveshell_do_write(monkeypatch, capsys, hf): monkeypatch.setattr(Db.LiveShell, "cmdloop", mock_cmdloop) monkeypatch.setattr(Db.LiveShell, "dump_session", lambda _: True) dbbuddy = Db.DbBuddy() crash_file = br.TempFile(byte_mode=True) liveshell = Db.LiveShell(dbbuddy, crash_file) load_file = "%s/mock_resources/test_databasebuddy_clients/dbbuddy_save.db" % hf.resource_path liveshell.do_load(load_file) capsys.readouterr() tmp_dir = br.TempDir() # write a summary monkeypatch.setattr("builtins.input", lambda _: "%s/save1" % tmp_dir.path) liveshell.do_write(None) assert os.path.isfile("%s/save1" % tmp_dir.path) with open("%s/save1" % tmp_dir.path, "r") as ifile: assert len(ifile.read()) == 249980 out, err = capsys.readouterr() assert re.search("1407 summary records.*written to.*save1", out) # write ids/accns dbbuddy.out_format = "ids" monkeypatch.setattr(br, "ask", lambda _: True) dbbuddy.records['O14727'].record = Db.Record('O14727', _record=True) liveshell.do_write("%s/save2" % tmp_dir.path) assert os.path.isfile("%s/save2" % tmp_dir.path) with open("%s/save2" % tmp_dir.path, "r") as ifile: assert len(ifile.read()) == 18661 out, err = capsys.readouterr() assert re.search("1407 accessions.*written to.*save2", out) # Abort summary monkeypatch.setattr(br, "ask", lambda _: False) liveshell.do_write("%s/save3" % tmp_dir.path) assert not os.path.isfile("%s/save3" % tmp_dir.path) out, err = capsys.readouterr() assert "Abort..." in out # Permission error dbbuddy.out_format = "fasta" monkeypatch.setattr("builtins.open", OpenPermissionError) liveshell.do_write("%s/save4" % tmp_dir.path) assert not os.path.isfile("%s/save4" % tmp_dir.path) out, err = capsys.readouterr() assert "Error: You do not have write privileges in the specified directory.\n\n" in out # File exists monkeypatch.setattr(br, "ask", lambda _: False) liveshell.do_write("%s/save2" % tmp_dir.path) out, err = capsys.readouterr() assert "Abort..." in out assert "written" not in out # Not a directory liveshell.do_write("%s/ghostdir/save5" % tmp_dir.path) out, err = capsys.readouterr() assert "The specified directory does not exist. Please create it before continuing" in out assert "written" not in out
def test_record_guess_genbank_mga(): randomly_generated_from_regex = ["BJCKQ0111866", "YXRUT6401652", "PVAGD7038775", "OGSVS5937667", "LPMXX1503516", "NTEWQ3440974", "CTDME6774392"] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_prot" assert rec.type == "protein"
def test_record_guess_uniprot(): randomly_generated_from_regex = ["K2O417", "I0DZU1", "A8GFV0", "J3K7W6", "O3U582", "C3YWY7GUS7", "Q0L5K7", "Q5FO16", "K9WMR5XBZ1"] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "uniprot" assert rec.type == "protein"
def test_record_guess_genbank_genome(): randomly_generated_from_regex = ["LJIJ8045260586", "MRMV14919426", "WBGU8744627061", "WYNM11788712", "SQVS3339736221", "LVGB461502017", "FAWG101678469"] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_nuc" assert rec.type == "nucleotide"
def test_record_guess_ensembl(): accns = ["ENSRNOG00000018630", "ENSMUSG00000057666", "ENSPTRG00000004577", "ENSCAFG00000015077", "ENSPPYG00000004189", "ENSPCAG00000006928", "ENSOPRG00000012514", "ENSECAG00000022051", "ENSTSYG00000002171", "FBgn0001987", "FBtr0330306", "FBcl0254909"] for accn in accns: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ensembl" assert rec.type == "nucleotide"
def test_record_guess_genbank_gi(): randomly_generated_from_regex = [ "13545654", "1445", "9876513546531", "154351", "135464316", "4684315", "21240" ] for accn in randomly_generated_from_regex: rec = Db.Record(accn) rec.guess_database() assert rec.database == "ncbi_nuc" assert rec.type == "gi_num" assert str(rec.gi) == accn assert rec.accession == accn
def mock_big_record_no_dl(_dbbuddy): _dbbuddy.records["NP_001287575.1"] = Db.Record("NP_001287575.1", _size=5000001)
def test_record_search(sb_resources): summary = { "ACCN": "F6SBJ1", "DB": "uniprot", "entry_name": "F6SBJ1_HORSE", "length": "451", "organism-id": "9796", "organism": "Equus caballus (Horse)", "protein_names": "Caspase", "comments": "Caution (1); Sequence similarities (1)", "record": "summary" } rec = Db.Record("F6SBJ1", summary=summary, _type="protein") assert rec.search("*") assert not rec.search("Foo") # Length operator True assert rec.search("(length=451)") assert rec.search("(length >=451)") assert rec.search("(length<= 451)") assert rec.search("(length > 200)") assert rec.search("(length<500)") # Length operator False assert not rec.search("(length=452)") assert not rec.search("(length>=452)") assert not rec.search("(length<=450)") assert not rec.search("(length>500)") assert not rec.search("(length<200)") # Length operator errors with pytest.raises(ValueError) as err: rec.search("(length!<200)") assert "Invalid syntax for seaching 'length': length!<200" in str(err) with pytest.raises(ValueError) as err: rec.search("(length<>200)") assert "Invalid operator: <>" in str(err) del rec.summary['length'] assert not rec.search("(length>200)") # Other columns assert rec.search("(ACCN) [A-Z0-9]{6}") assert not rec.search("(ACCN) [A-Z0-9]{7}") print(rec.type) assert rec.search("(Type) prot") assert not rec.search("(Type) nucl") assert rec.search("(DB) uniprot") assert not rec.search("(DB) ncbi") assert rec.search("(comments)(Caution|Blahh)") assert not rec.search("(organism)Sheep") assert rec.search("(entry_name)") assert rec.search("(entry_name) ") assert not rec.search("(foo_name)") # No columns -> params assert rec.search("F6SBJ1") assert rec.search("uniprot") assert rec.search("protein") # No columns -> summary assert rec.search("Equus") assert not rec.search("equus") assert rec.search("i?equus") assert rec.search("?iEqUuS") # Genbank record sb_obj = sb_resources.get_one("p g") rec = Db.Record("Mle-Panxα8", _record=sb_obj.records[4]) assert rec.search("Innexin") assert not rec.search("ML07312abcd")
def test_uniprotrestclient_fetch_proteins(monkeypatch, capsys, hf): def patch_query_uniprot_search(*args, **kwargs): print("patch_query_uniprot_search\nargs: %s\nkwargs: %s" % (args, kwargs)) client.results_file.write( '''# Search: inx15 A8XEF9 A8XEF9_CAEBR 381 6238 Caenorhabditis briggsae Innexin Function (1); Sequence similarities (1); \ Subcellular location (2) O61786 O61786_CAEEL 382 6239 Caenorhabditis elegans Innexin Function (1); Sequence similarities (1); \ Subcellular location (2) A0A0H5SBJ0 A0A0H5SBJ0_BRUMA 129 6279 Brugia malayi (Filarial nematode worm) Innexin Function (1); Sequence \ similarities (1); Subcellular location (1) E3MGD6 E3MGD6_CAERE 384 31234 Caenorhabditis remanei (Caenorhabditis vulgaris) Innexin Function (1); \ Sequence similarities (1); Subcellular location (2) // # Search: inx16 O61787 INX16_CAEEL 372 6239 Caenorhabditis elegans Innexin-16 (Protein opu-16) Function (1); Sequence \ similarities (1); Subcellular location (1) A0A0V1AZ11 A0A0V1AZ11_TRISP 406 6334 Trichinella spiralis (Trichina worm) Innexin Caution (1); Function (1); \ Sequence similarities (1); Subcellular location (2) A8XEF8 A8XEF8_CAEBR 374 6238 Caenorhabditis briggsae Innexin Function (1); Sequence similarities (1); \ Subcellular location (2) A0A0B2VB60 A0A0B2VB60_TOXCA 366 6265 Toxocara canis (Canine roundworm) Innexin Caution (2); Function (1); \ Sequence similarities (1); Subcellular location (1) A0A0V0W5E2 A0A0V0W5E2_9BILA 410 92179 Trichinella sp. T6 Innexin Caution (2); Function (1); Sequence \ similarities (1); Subcellular location (1) //''', "w") return def patch_query_uniprot_fetch(*args, **kwargs): print("patch_query_uniprot_fetch\nargs: %s\nkwargs: %s" % (args, kwargs)) with open("%s/mock_resources/test_databasebuddy_clients/uniprot_fetch.txt" % hf.resource_path, "r") \ as ifile: client.results_file.write(ifile.read(), "w") return def patch_query_uniprot_fetch_nothing(*args, **kwargs): print("patch_query_uniprot_fetch_nothing\nargs: %s\nkwargs: %s" % (args, kwargs)) client.results_file.write( "# Search: A8XEF9,O61786,A0A0H5SBJ0,E3MGD6,O61787,A0A0V1AZ11,A8XEF8,A0A0B2VB60," "A0A0V0W5E2\n//\n//", "w") return monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot", lambda _: True) dbbuddy = Db.DbBuddy("inx15,inx16") client = Db.UniProtRestClient(dbbuddy) client.fetch_proteins() out, err = capsys.readouterr() assert client.results_file.read() == "" assert "full records from UniProt..." not in err # Test a single call to query_uniprot monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot", patch_query_uniprot_search) client.search_proteins() monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot", patch_query_uniprot_fetch) client.fetch_proteins() out, err = capsys.readouterr() assert "Requesting 9 full records from UniProt..." in err # Test multicore call to query_uniprot monkeypatch.setattr(br, "run_multicore_function", patch_query_uniprot_fetch) for accn, rec in client.dbbuddy.records.items(): rec.record = None client.dbbuddy.records["a" * 999] = Db.Record("a" * 999, _database="uniprot") client.fetch_proteins() out, err = capsys.readouterr() assert "Requesting 10 full records from UniProt..." in err seq = str(client.dbbuddy.records["A8XEF9"].record.seq) assert hf.string2hash(seq) == "04f13629336cf6cdd5859c8913b742a5" # Some edge cases monkeypatch.setattr(Db.UniProtRestClient, "query_uniprot", patch_query_uniprot_fetch_nothing) client.http_errors_file.write("inx15\n%s\n//\n" % URLError("Fake URLError from Mock")) client.dbbuddy.records = OrderedDict([("a" * 999, Db.Record("a" * 999, _database="uniprot"))]) client.fetch_proteins() out, err = capsys.readouterr() assert "Requesting 1 full records from UniProt..." in err assert "No sequences returned\n\n" in err assert "The following errors were encountered while querying UniProt with fetch_proteins():" in err assert hf.string2hash(str( client.dbbuddy.records["a" * 999])) == "670bf9c6ae5832b42841798d882a7276" with pytest.raises(ValueError) as err: client.dbbuddy.records["a" * 1001] = Db.Record("a" * 1001, _database="uniprot") client.fetch_proteins() assert "The provided accession or search term is too long (>1000)." in str( err)