예제 #1
0
def test_search_composite():
    query1 = rcsb.FieldQuery("rcsb_entity_host_organism.scientific_name",
                             exact_match="H**o sapiens")
    query2 = rcsb.FieldQuery("exptl.method", exact_match="SOLUTION NMR")
    ids_1 = set(rcsb.search(query1))
    ids_2 = set(rcsb.search(query2))
    ids_or = set(rcsb.search(query1 | query2))
    ids_and = set(rcsb.search(query1 & query2))

    assert ids_or == ids_1 | ids_2
    assert ids_and == ids_1 & ids_2
예제 #2
0
def test_search_field(field, molecular_definition, params, ref_ids):
    query = rcsb.FieldQuery(field, molecular_definition, **params)
    test_ids = rcsb.search(query)
    test_count = rcsb.count(query)

    assert set(test_ids) == set(ref_ids)
    assert test_count == len(ref_ids)
예제 #3
0
import matplotlib.pyplot as plt
import biotite
import biotite.database.rcsb as rcsb
from datetime import datetime, time

years = np.arange(1990, datetime.today().year + 1)
xray_count = np.zeros(len(years), dtype=int)
nmr_count = np.zeros(len(years), dtype=int)
em_count = np.zeros(len(years), dtype=int)
tot_count = np.zeros(len(years), dtype=int)
# For each year fetch the list of released PDB IDs
# and count the number
for i, year in enumerate(years):
    # A query that comprises one year
    date_query = rcsb.FieldQuery(
        "rcsb_accession_info.initial_release_date",
        range_closed=(datetime.combine(datetime(year, 1, 1), time.min),
                      datetime.combine(datetime(year, 12, 31), time.max)))
    xray_query = rcsb.FieldQuery("exptl.method",
                                 exact_match="X-RAY DIFFRACTION")
    nmr_query = rcsb.FieldQuery("exptl.method", exact_match="SOLUTION NMR")
    em_query = rcsb.FieldQuery("exptl.method",
                               exact_match="ELECTRON MICROSCOPY")
    # Get the amount of structures, that were released in that year
    # AND were elucidated with the respective method
    xray_count[i], nmr_count[i], em_count[i] = [
        rcsb.count(date_query & method_query)
        for method_query in [xray_query, nmr_query, em_query]
    ]
    # Get the total amount of structures released in that year
    tot_count[i] = rcsb.count(date_query)
예제 #4
0
pdb_ids = rcsb.search(query)
print(pdb_ids)
print(rcsb.count(query))
files = rcsb.fetch(pdb_ids, "mmtf", gettempdir())

########################################################################
# This was a simple search for the occurrence of the search term in any
# field.
# You can also search for a value in a specific field with a
# :class:`FieldQuery`.
# A complete list of the available fields and its supported operators
# is documented
# `on this page <https://search.rcsb.org/search-attributes.html>`_.

# Query for 'lacA' gene
query1 = rcsb.FieldQuery("rcsb_entity_source_organism.rcsb_gene_name.value",
                         exact_match="lacA")
# Query for resolution below 1.5 Å
query2 = rcsb.FieldQuery("reflns.d_resolution_high", less=1.5)

########################################################################
# The search API allows even more complex queries, e.g. for sequence
# or structure similarity. Have a look at the API reference of
# :mod:`biotite.database.rcsb`.
#
# Multiple :class:`Query` objects can be combined using the ``|`` (or)
# or ``&`` (and) operator for a more fine-grained selection.
# A :class:`FieldQuery` is negated with ``~``.

composite_query = query1 & ~query2
print(rcsb.search(composite_query))
예제 #5
0
def test_search_invalid(field, params):
    invalid_query = rcsb.FieldQuery(field, **params)
    with pytest.raises(RequestError, match="400"):
        rcsb.search(invalid_query)
    with pytest.raises(RequestError, match="400"):
        rcsb.count(invalid_query)
예제 #6
0
import datetime
import concurrent.futures
import tarfile
import biotite.database.rcsb as rcsb
import biotite.structure.io.mmtf as mmtf


### Download of PDB and archive creation ###

# MMTF files are downloaded into a new directory in this path
# and the .tar archive is created here
base_path = "path/to/directoy"

# Obtain all PDB IDs using a query that includes all entries
# Each PDB entry has a title
all_id_query = rcsb.FieldQuery("struct.title")
pdb_ids = rcsb.search(all_id_query)

# Name for download directory
now = datetime.datetime.now()
mmtf_dir = os.path.join(
    base_path, f"mmtf_{now.year:04d}{now.month:02d}{now.day:02d}"
)
if not os.path.isdir(mmtf_dir):
    os.mkdir(mmtf_dir)

# Download all PDB IDs with parallelized HTTP requests
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    for pdb_id in pdb_ids:
        executor.submit(rcsb.fetch, pdb_id, "mmtf", mmtf_dir)