def test_ncbiclient_fetch_sequences(hf, monkeypatch, capsys):
    def patch_entrez_fetch_seq(*args, **kwargs):
        print("patch_entrez_fetch_seq\nargs: %s\nkwargs: %s" % (args, kwargs))
        test_file = "{0}mock_resources{1}test_databasebuddy_clients" \
                    "{1}Entrez_efetch_seq.gb".format(hf.resource_path, os.path.sep)
        with open(test_file, "r") as ifile:
            client.results_file.write(ifile.read())

    # Empty DbBuddy
    dbbuddy = Db.DbBuddy()
    client = Db.NCBIClient(dbbuddy)
    client.fetch_sequences("ncbi_prot")
    assert hf.string2hash(str(dbbuddy)) == "016d020dd926f64ac1431f15c5683678"

    # With records
    monkeypatch.setattr(Db.NCBIClient, "_mc_query", patch_entrez_fetch_seq)
    dbbuddy = Db.DbBuddy("XP_010103297.1,XP_010103298.1,XM_010104998.1")
    client = Db.NCBIClient(dbbuddy)
    client.fetch_sequences("ncbi_prot")
    dbbuddy.out_format = "gb"
    assert hf.string2hash(str(dbbuddy)) == "9bd8017da009696c1b6ebe5d4e3c0a89"
    capsys.readouterr()  # Clean up the buffer
    dbbuddy.print()
    out, err = capsys.readouterr()
    out = re.sub(".*?sec.*?\n", "", out)
    assert hf.string2hash(out) == "40b60e455df6ba092dbf96dc028ca82f"
def test_ncbiclient_fetch_summaries(hf, monkeypatch, capsys):
    def patch_entrez_fetch_summaries(*args, **kwargs):
        print("patch_entrez_fetch_summaries\nargs: %s\nkwargs: %s" % (args, kwargs))
        if "esummary_seq" in kwargs["func_args"]:
            test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_esummary_seq.xml" % hf.resource_path
            with open(test_file, "r") as ifile:
                client.results_file.write(ifile.read().strip())
                client.results_file.write('\n### END ###\n')
        elif "esummary_taxa" in kwargs["func_args"]:
            test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_esummary_taxa.xml" % hf.resource_path
            with open(test_file, "r") as ifile:
                client.results_file.write(ifile.read().strip())
                client.results_file.write('\n### END ###\n')
        return

    # No records to fetch
    dbbuddy = Db.DbBuddy()
    client = Db.NCBIClient(dbbuddy)
    client.fetch_summaries("ncbi_prot")
    assert not client.dbbuddy.records

    monkeypatch.setattr(Db.NCBIClient, "_mc_query", patch_entrez_fetch_summaries)
    dbbuddy = Db.DbBuddy("XP_010103297,XP_010103298.1,AAY72386.1")
    client = Db.NCBIClient(dbbuddy)
    capsys.readouterr()
    client.fetch_summaries("ncbi_prot")

    assert dbbuddy.records["AAY72386.1"].summary["organism"] == "Unclassified"
    assert hf.string2hash(str(dbbuddy)) == "3a5e3379b12afe4044d2ece852ab2556"
def test_ncbiclient_search_ncbi(hf, monkeypatch, capsys):
    def patch_entrez_esearch(*args, **kwargs):
        print("patch_entrez_esearch\nargs: %s\nkwargs: %s" % (args, kwargs))
        if "rettype" in kwargs:
            test_file = br.TempFile()
            test_file.write("""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" \
"http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
<eSearchResult>
    <Count>5</Count>
</eSearchResult>
""")
            handle = test_file.get_handle(mode="r")
        else:
            handle = open("%s/mock_resources/test_databasebuddy_clients/Entrez_esearch.xml" % hf.resource_path,
                          "r")
        return handle

    monkeypatch.setattr(Db.Entrez, "esearch", patch_entrez_esearch)
    monkeypatch.setattr(Db.NCBIClient, "fetch_summaries", lambda _: True)
    monkeypatch.setattr(Db, "sleep", lambda _: True)
    dbbuddy = Db.DbBuddy("XP_012618499.1")
    client = Db.NCBIClient(dbbuddy)
    dbbuddy.search_terms = []
    client.search_ncbi("protein")
    assert len(dbbuddy.records) == 1

    dbbuddy.search_terms = ["casp9"]
    client.search_ncbi("protein")
    for accn in ["XP_012618499.1", "O88738.2", "O88879.3", "Q9NR09.2", "Q13075.3"]:
        assert accn in dbbuddy.records
Beispiel #4
0
def test_ncbiclient_init():
    dbbuddy = Db.DbBuddy(", ".join(ACCNS[:3]))
    client = Db.NCBIClient(dbbuddy)
    assert client.Entrez.email == br.config_values()['email']
    assert client.Entrez.tool == "buddysuite"
    assert hash(dbbuddy) == hash(client.dbbuddy)
    assert type(client.http_errors_file) == br.TempFile
    assert type(client.results_file) == br.TempFile
    assert client.max_url == 1000
    assert client.max_attempts == 5
Beispiel #5
0
def test_ncbiclient_fetch_summaries(hf, monkeypatch):
    def patch_entrez_fetch_summaries(*args, **kwargs):
        print("patch_entrez_fetch_summaries\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        if kwargs["func_args"] == ["esummary_seq"]:
            test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_esummary_seq.xml" % hf.resource_path
            with open(test_file, "r") as ifile:
                client.results_file.write(ifile.read().strip())
                client.results_file.write('\n### END ###\n')
        elif kwargs["func_args"] == ["esummary_taxa"]:
            test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_esummary_taxa.xml" % hf.resource_path
            with open(test_file, "r") as ifile:
                client.results_file.write(ifile.read().strip())
                client.results_file.write('\n### END ###\n')
        elif kwargs["func_args"] == ["efetch_gi"]:
            client.results_file.write("""703125407
703125412
67586143
### END ###
""")
        return

    # No records to fetch
    dbbuddy = Db.DbBuddy()
    client = Db.NCBIClient(dbbuddy)
    client.fetch_summaries("ncbi_prot")
    assert not client.dbbuddy.records

    monkeypatch.setattr(Db.NCBIClient, "_mc_query",
                        patch_entrez_fetch_summaries)
    dbbuddy = Db.DbBuddy("XP_010103297,XP_010103298.1,67586143,257467473")
    client = Db.NCBIClient(dbbuddy)
    client.fetch_summaries("ncbi_prot")
    for accn, rec in dbbuddy.records.items():
        assert rec.gi in [703125407, 703125412, 67586143, 257467473]
    assert dbbuddy.records["AAY72386.1"].summary["organism"] == "Unclassified"
    assert hf.string2hash(str(dbbuddy)) == "0cf7c9ccf058cf3b50d2aab7ecb1f953"
Beispiel #6
0
def test_ncbiclient_search_ncbi(hf, monkeypatch, capsys):
    def patch_entrez_esearch(*args, **kwargs):
        print("patch_entrez_esearch\nargs: %s\nkwargs: %s" % (args, kwargs))
        if "rettype" in kwargs:
            test_file = br.TempFile()
            test_file.write("""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" \
"http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
<eSearchResult>
    <Count>5</Count>
</eSearchResult>
""")
            handle = test_file.get_handle(mode="r")
        else:
            handle = open(
                "%s/mock_resources/test_databasebuddy_clients/Entrez_esearch.xml"
                % hf.resource_path, "r")
        return handle

    monkeypatch.setattr(Db.Entrez, "esearch", patch_entrez_esearch)
    monkeypatch.setattr(Db.NCBIClient, "fetch_summaries", lambda _: True)
    monkeypatch.setattr(Db, "sleep", lambda _: True)
    dbbuddy = Db.DbBuddy("909549231")
    client = Db.NCBIClient(dbbuddy)
    dbbuddy.search_terms = []
    client.search_ncbi("protein")
    assert len(dbbuddy.records) == 1

    dbbuddy.search_terms = ["casp9"]
    client.search_ncbi("protein")
    for accn in [
            "909549231", "909549227", "909549224", "909546647", "306819620"
    ]:
        assert accn in dbbuddy.records

    monkeypatch.setattr(Db.Entrez, "esearch", mock_raise_keyboardinterrupt)
    client.search_ncbi("protein")
    out, err = capsys.readouterr()
    assert 'NCBI returned no protein results' in err
Beispiel #7
0
def test_ncbiclient_mc_query(hf, monkeypatch):
    def patch_entrez_esummary_taxa(*args, **kwargs):
        print("patch_entrez_esummary_taxa\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_esummary_taxa.xml" % hf.resource_path
        return open(test_file, "r")

    def patch_entrez_efetch_gis(*args, **kwargs):
        print("patch_entrez_efetch_gis\nargs: %s\nkwargs: %s" % (args, kwargs))
        tmp_file = br.TempFile()
        tmp_file.write("703125407\n703125412\n67586143\n")
        return tmp_file.get_handle("r")

    def patch_entrez_esummary_seq(*args, **kwargs):
        print("patch_entrez_esummary_seq\nargs: %s\nkwargs: %s" %
              (args, kwargs))
        test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_esummary_seq.xml" % hf.resource_path
        return open(test_file, "r")

    def patch_entrez_efetch_seq(*args, **kwargs):
        print("patch_entrez_efetch_seq\nargs: %s\nkwargs: %s" % (args, kwargs))
        test_file = "%s/mock_resources/test_databasebuddy_clients/Entrez_efetch_seq.gb" % hf.resource_path
        return open(test_file, "r")

    monkeypatch.setattr(Db, "sleep",
                        lambda _: True)  # No need to wait around for stuff...
    dbbuddy = Db.DbBuddy()
    client = Db.NCBIClient(dbbuddy)

    monkeypatch.setattr(Db.Entrez, "esummary", patch_entrez_esummary_taxa)
    client._mc_query("649,734,1009,2302", ["esummary_taxa"])
    assert hf.string2hash(
        client.results_file.read()) == "acfb85bbdf7c2f8ea7e925c5bfcaaf06"
    client.results_file.clear()

    monkeypatch.setattr(Db.Entrez, "efetch", patch_entrez_efetch_gis)
    client._mc_query("XP_010103297.1,XP_010103298.1,XP_010103299.1",
                     ["efetch_gi"])
    assert client.results_file.read(
    ) == "703125407\n703125412\n67586143\n### END ###\n"
    client.results_file.clear()

    monkeypatch.setattr(Db.Entrez, "esummary", patch_entrez_esummary_seq)
    client._mc_query("703125407,703125412,67586143", ["esummary_seq"])
    assert hf.string2hash(
        client.results_file.read()) == "e6ba80b5fe2f35002ac2227ca7791c17"
    client.results_file.clear()

    monkeypatch.setattr(Db.Entrez, "efetch", patch_entrez_efetch_seq)
    client._mc_query("703125407,703125412,67586143", ["efetch_seq"])
    assert hf.string2hash(
        client.results_file.read()) == "0154d7bd9d47ca6abac00f25428b9e7e"

    monkeypatch.undo()
    monkeypatch.setattr(Db, "sleep", lambda _: True)
    with pytest.raises(ValueError) as err:
        client._mc_query("703125407", ["foo"])
    assert "'tool' argument must be in 'esummary_taxa', 'efetch_gi', 'esummary_seq', or 'efetch_seq'" in str(
        err)

    with pytest.raises(ValueError) as err:
        client._mc_query("703125407", ["foo", "Bar"])
    assert "Unknown type 'Bar', choose between 'nucleotide' and 'protein" in str(
        err)

    monkeypatch.setattr(Db.Entrez, "efetch", mock_raise_httperror)
    client._mc_query("703125407,703125412,67586143", ["efetch_seq"])
    assert "NCBI request failed: 703125407,703125412,67586143\nHTTP Error 101: Fake HTTPError from Mock\n//" \
           in client.http_errors_file.read()

    assert "Service unavailable" not in client.http_errors_file.read()
    monkeypatch.setattr(Db.Entrez, "efetch", mock_raise_503_httperror)
    client._mc_query("703125407", ["efetch_seq"])
    assert "Service unavailable" in client.http_errors_file.read()

    monkeypatch.setattr(Db.Entrez, "efetch", mock_raise_connectionreseterror)
    client._mc_query("703125407", ["efetch_seq"])
    assert "NCBI request failed: 703125407\nFake ConnectionResetError from Mock: [Errno 54] Connection reset by peer"\
           in client.http_errors_file.read()

    assert "are you connected to the internet?" not in client.http_errors_file.read(
    )
    monkeypatch.setattr(Db.Entrez, "efetch", mock_raise_urlerror_8)
    client._mc_query("703125407", ["efetch_seq"])
    assert "are you connected to the internet?" in client.http_errors_file.read(
    )

    assert "<urlopen error Fake URLError from Mock>" not in client.http_errors_file.read(
    )
    monkeypatch.setattr(Db.Entrez, "efetch", mock_raise_urlerror)
    client._mc_query("703125407", ["efetch_seq"])
    assert "<urlopen error Fake URLError from Mock>" in client.http_errors_file.read(
    )