コード例 #1
0
def get_sprot_raw(id):
    """Get a text handle to a raw SwissProt entry at ExPASy.

    For an ID of XXX, fetches http://www.uniprot.org/uniprot/XXX.txt
    (as per the https://www.expasy.org/expasy_urls.html documentation).

    >>> from Bio import ExPASy
    >>> from Bio import SwissProt
    >>> handle = ExPASy.get_sprot_raw("O23729")
    >>> record = SwissProt.read(handle)
    >>> handle.close()
    >>> print(record.entry_name)
    CHS3_BROFI

    For a non-existing identifier, UniProt returns an error:

    >>> # Python2/3 docstring workaround: Revise for 'Python 3 only'
    >>> try:
    ...    ExPASy.get_sprot_raw("DOES_NOT_EXIST")
    ... except Exception as e:
    ...    print('HTTPError: %s' %e)
    HTTPError: ... Error 404: 

    """  # noqa: W291
    url = "http://www.uniprot.org/uniprot/%s.txt" % id
    return _binary_to_string_handle(_urlopen(url))
コード例 #2
0
def get_prosite_raw(id, cgi=None):
    """Get a text handle to a raw PROSITE or PRODOC record at ExPASy.

    The cgi argument is deprecated due to changes in the ExPASy
    website.

    >>> from Bio import ExPASy
    >>> from Bio.ExPASy import Prosite
    >>> handle = ExPASy.get_prosite_raw('PS00001')
    >>> record = Prosite.read(handle)
    >>> handle.close()
    >>> print(record.accession)
    PS00001

    For a non-existing key, ExPASy returns an error:

    >>> # Python 2/3 docstring workaround: Revise for 'Python 3 only'
    >>> try:
    ...    handle = ExPASy.get_prosite_raw("does_not_exist")
    ... except Exception as e:
    ...    print('HTTPError: %s' %e)
    HTTPError: ... Error 404: Not Found

    """
    url = "https://prosite.expasy.org/%s.txt" % id
    return _binary_to_string_handle(_urlopen(url))
コード例 #3
0
ファイル: __init__.py プロジェクト: JureFabjan/biopython
def search(text,
           output_format="tab",
           sort="score",
           oragnism="",
           columns=(),
           isoform=False,
           compress=False,
           offset=0,
           limit=0):
    """Perform a query over the UniProt API.

    More at: https://www.uniprot.org/help/api_queries
    """
    cgi = "https://www.uniprot.org/uniprot/?"
    variables = {
        "query": text,
        "format": output_format,
        "sort": sort,
        "offset": str(offset)
    }
    if oragnism:
        variables["organism"] = oragnism
    if columns:
        variables["columns"] = ",".join(columns)
    if isoform:
        variables["isoform"] = "Yes"
    if compress:
        variables["compress"] = "Yes"
    if limit:
        variables["limit"] = str(limit)

    fullcgi = "".join((cgi, _urlencode(variables)))
    return _binary_to_string_handle(_urlopen(fullcgi))
コード例 #4
0
ファイル: __init__.py プロジェクト: Glchriste/Paralog-Network
def _open(cgi, params={}, post=False):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers.
    """
    # NCBI requirement: At most three queries per second.
    # Equivalently, at least a third of second between queries
    delay = 0.333333334
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current
    # Remove None values from the parameters
    for key, value in params.items():
        if value is None:
            del params[key]
    # Tell Entrez that we are using Biopython (or whatever the user has
    # specified explicitly in the parameters or by changing the default)
    if not "tool" in params:
        params["tool"] = tool
    # Tell Entrez who we are
    if not "email" in params:
        if email is not None:
            params["email"] = email
        else:
            warnings.warn("""
Email address is not specified.

To make use of NCBI's E-utilities, NCBI strongly recommends you to specify
your email address with each request. From June 1, 2010, this will be
mandatory. As an example, if your email address is [email protected], you
can specify it as follows:
   from Bio import Entrez
   Entrez.email = '*****@*****.**'
In case of excessive usage of the E-utilities, NCBI will attempt to contact
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)
    # Open a handle to Entrez.
    options = _urlencode(params, doseq=True)
    #print cgi + "?" + options
    try:
        if post:
            #HTTP POST
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            #HTTP GET
            cgi += "?" + options
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)
コード例 #5
0
def get_prosite_raw(id, cgi=None):
    """Get a text handle to a raw PROSITE or PRODOC record at ExPASy.

    The cgi argument is deprecated due to changes in the ExPASy
    website.

    For a non-existing key, ExPASy returns nothing.

    >>> from Bio import ExPASy
    >>> from Bio.ExPASy import Prosite
    >>> with ExPASy.get_prosite_raw('PS00001') as handle:
    ...    record = Prosite.read(handle)
    ...
    >>> print(record.accession)
    PS00001


    For a non-existing key, ExPASy returns an error:

    >>> handle = get_prosite_raw("does_not_exist")
    Traceback (most recent call last):
    ...
    urllib.error.HTTPError: HTTP Error 404: Not Found

    """
    url = "http://prosite.expasy.org/%s.txt" % id
    return _binary_to_string_handle(_urlopen(url))
コード例 #6
0
ファイル: __init__.py プロジェクト: HuttonICS/biopython
def get_prosite_raw(id, cgi=None):
    """Get a text handle to a raw PROSITE or PRODOC record at ExPASy.

    The cgi argument is deprecated due to changes in the ExPASy
    website.

    For a non-existing key, ExPASy returns nothing.

    >>> from Bio import ExPASy
    >>> from Bio.ExPASy import Prosite
    >>> with ExPASy.get_prosite_raw('PS00001') as handle:
    ...    record = Prosite.read(handle)
    ...
    >>> print(record.accession)
    PS00001


    For a non-existing key, ExPASy returns an error:

    >>> handle = get_prosite_raw("does_not_exist")
    Traceback (most recent call last):
    ...
    urllib.error.HTTPError: HTTP Error 404: Not Found

    """
    url = "https://prosite.expasy.org/%s.txt" % id
    return _binary_to_string_handle(_urlopen(url))
コード例 #7
0
ファイル: __init__.py プロジェクト: anntzer/biopython
def _open(url, post=None):
    """Build the URL and open a handle to it (PRIVATE).

    Open a handle to TogoWS, will raise an IOError if it encounters an error.

    In the absence of clear guidelines, this function enforces a limit of
    "up to three queries per second" to avoid abusing the TogoWS servers.
    """
    delay = 0.333333333  # one third of a second
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current

    # print(url)
    if post:
        handle = _urlopen(url, _as_bytes(post))
    else:
        handle = _urlopen(url)

    # We now trust TogoWS to have set an HTTP error code, that
    # suffices for my current unit tests. Previously we would
    # examine the start of the data returned back.
    return _binary_to_string_handle(handle)
コード例 #8
0
def _open(cgi, params={}, post=False):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers.
    """
    # NCBI requirement: At most three queries per second.
    # Equivalently, at least a third of second between queries
    delay = 0.333333334
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current
    # Remove None values from the parameters
    for key, value in list(params.items()):
        if value is None:
            del params[key]
    # Tell Entrez that we are using Biopython (or whatever the user has
    # specified explicitly in the parameters or by changing the default)
    if not "tool" in params:
        params["tool"] = tool
    # Tell Entrez who we are
    if not "email" in params:
        if email is not None:
            params["email"] = email
        else:
            warnings.warn(
                """
Email address is not specified.

To make use of NCBI's E-utilities, NCBI requires you to specify your
email address with each request.  As an example, if your email address
is [email protected], you can specify it as follows:
   from Bio import Entrez
   Entrez.email = '*****@*****.**'
In case of excessive usage of the E-utilities, NCBI will attempt to contact
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)
    # Open a handle to Entrez.
    options = _urlencode(params, doseq=True)
    #print cgi + "?" + options
    try:
        if post:
            #HTTP POST
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            #HTTP GET
            cgi += "?" + options
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)
コード例 #9
0
ファイル: __init__.py プロジェクト: JustinGibbons/biopython
def _open(url, post=None):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to TogoWS, will raise an IOError if it encounters an error.

    In the absense of clear guidelines, this function enforces a limit of
    "up to three queries per second" to avoid abusing the TogoWS servers.
    """
    delay = 0.333333333  # one third of a second
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current

    #print url
    try:
        if post:
            handle = urllib2.urlopen(url, _as_bytes(urllib.urlencode(post)))
        else:
            handle = urllib2.urlopen(url)
    except urllib2.HTTPError as exception:
        raise exception

    #We now trust TogoWS to have set an HTTP error code, that
    #suffices for my current unit tests. Previously we would
    #examine the start of the data returned back.
    return _binary_to_string_handle(handle)
コード例 #10
0
ファイル: __init__.py プロジェクト: YaoIna/PythonStart
def _open(cgi, params=None, post=None, ecitmatch=False):
    """Build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    The argument post should be a boolean to explicitly control if an HTTP
    POST should be used rather an HTTP GET based on the query length.
    By default (post=None), POST is used if the URL encoded parameters would
    be over 1000 characters long.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers.
    """
    # NCBI requirement: At most three queries per second if no API key is provided.
    # Equivalently, at least a third of second between queries
    params = _construct_params(params)
    options = _encode_options(ecitmatch, params)
    # Using just 0.333333334 seconds sometimes hit the NCBI rate limit,
    # the slightly longer pause of 0.37 seconds has been more reliable.
    delay = 0.1 if api_key else 0.37
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current

    # By default, post is None. Set to a boolean to over-ride length choice:
    if post is None and len(options) > 1000:
        post = True
    cgi = _construct_cgi(cgi, post, options)

    for i in range(max_tries):
        try:
            if post:
                handle = _urlopen(cgi, data=_as_bytes(options))
            else:
                handle = _urlopen(cgi)
        except _URLError as exception:
            # Reraise if the final try fails
            if i >= max_tries - 1:
                raise

            # Reraise if the exception is triggered by a HTTP 4XX error
            # indicating some kind of bad request
            if isinstance(exception, _HTTPError) \
                    and exception.status // 100 == 4:
                raise

            # Treat everything else as a transient error and try again after a
            # brief delay.
            time.sleep(sleep_between_tries)
        else:
            break

    return _binary_to_string_handle(handle)
コード例 #11
0
def sprot_search_ful(text, make_wild=None, swissprot=1, trembl=None,
                     cgi='http://www.expasy.ch/cgi-bin/sprot-search-ful'):
    """Search SwissProt by full text (BROKEN)."""
    variables = {'SEARCH': text}
    if make_wild:
        variables['makeWild'] = 'on'
    if swissprot:
        variables['S'] = 'on'
    if trembl:
        variables['T'] = 'on'
    options = _urlencode(variables)
    fullcgi = "%s?%s" % (cgi, options)
    handle = _binary_to_string_handle(_urlopen(fullcgi))
    return handle
コード例 #12
0
ファイル: REST.py プロジェクト: xnlsbunyu/biopython
def _q(op, arg1, arg2=None, arg3=None):
    URL = "http://rest.kegg.jp/%s"
    if arg2 and arg3:
        args = "%s/%s/%s/%s" % (op, arg1, arg2, arg3)
    elif arg2:
        args = "%s/%s/%s" % (op, arg1, arg2)
    else:
        args = "%s/%s" % (op, arg1)
    resp = _urlopen(URL % (args))

    if "image" == arg2:
        return resp

    return _binary_to_string_handle(resp)
コード例 #13
0
def sprot_search_de(text, swissprot=1, trembl=None,
                    cgi='http://www.expasy.ch/cgi-bin/sprot-search-de'):
    """Search SwissProt (BROKEN).

    Search by name, description, gene name, species, or organelle.
    """
    variables = {'SEARCH': text}
    if swissprot:
        variables['S'] = 'on'
    if trembl:
        variables['T'] = 'on'
    options = _urlencode(variables)
    fullcgi = "%s?%s" % (cgi, options)
    return _binary_to_string_handle(_urlopen(fullcgi))
コード例 #14
0
ファイル: REST.py プロジェクト: abradle/biopython
def _q(op, arg1, arg2=None, arg3=None):
    URL = "http://rest.kegg.jp/%s"
    if arg2 and arg3:
        args = "%s/%s/%s/%s" % (op, arg1, arg2, arg3)
    elif arg2:
        args = "%s/%s/%s" % (op, arg1, arg2)
    else:
        args = "%s/%s" % (op, arg1)
    resp = _urlopen(URL % (args))

    if "image" == arg2:
        return resp

    return _binary_to_string_handle(resp)
コード例 #15
0
ファイル: __init__.py プロジェクト: HuttonICS/biopython
def get_prosite_entry(id,
                      cgi='https://prosite.expasy.org/cgi-bin/prosite/get-prosite-entry'):
    """Get a text handle to a PROSITE entry at ExPASy in HTML format.

    >>> from Bio import ExPASy
    >>> with ExPASy.get_prosite_entry('PS00001') as in_handle:
    ...     html = in_handle.read()
    ...
    >>> with open("myprositerecord.html", "w") as out_handle:
    ...     out_handle.write(html)
    ...

    For a non-existing key XXX, ExPASy returns an HTML-formatted page
    containing this text: 'There is currently no PROSITE entry for'
    """
    return _binary_to_string_handle(_urlopen("%s?%s" % (cgi, id)))
コード例 #16
0
def get_prosite_entry(
        id, cgi='http://prosite.expasy.org/cgi-bin/prosite/get-prosite-entry'):
    """Get a text handle to a PROSITE entry at ExPASy in HTML format.

    >>> from Bio import ExPASy
    >>> with ExPASy.get_prosite_entry('PS00001') as in_handle:
    ...     html = in_handle.read()
    ...
    >>> with open("myprositerecord.html", "w") as out_handle:
    ...     out_handle.write(html)
    ...

    For a non-existing key XXX, ExPASy returns an HTML-formatted page
    containing this text: 'There is currently no PROSITE entry for'
    """
    return _binary_to_string_handle(_urlopen("%s?%s" % (cgi, id)))
コード例 #17
0
ファイル: __init__.py プロジェクト: YaoIna/PythonStart
def get_prodoc_entry(id,
                     cgi='https://prosite.expasy.org/cgi-bin/prosite/get-prodoc-entry'):
    """Get a text handle to a PRODOC entry at ExPASy in HTML format.

    >>> from Bio import ExPASy
    >>> in_handle = ExPASy.get_prodoc_entry('PDOC00001')
    >>> html = in_handle.read()
    >>> in_handle.close()
    ...
    >>> with open("myprodocrecord.html", "w") as out_handle:
    ...     # Python2/3 docstring workaround: Revise for 'Python 3 only'
    ...     _ = out_handle.write(html)
    ...

    For a non-existing key XXX, ExPASy returns an HTML-formatted page
    containing this text: 'There is currently no PROSITE entry for'
    """
    return _binary_to_string_handle(_urlopen("%s?%s" % (cgi, id)))
コード例 #18
0
ファイル: __init__.py プロジェクト: HuttonICS/biopython
def _open(cgi, params=None, post=None, ecitmatch=False):
    """Build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    The arugment post should be a boolean to explicitly control if an HTTP
    POST should be used rather an HTTP GET based on the query length.
    By default (post=None), POST is used if the URL encoded paramters would
    be over 1000 characters long.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers.
    """
    # NCBI requirement: At most three queries per second if no API key is provided.
    # Equivalently, at least a third of second between queries
    params = _construct_params(params)
    options = _encode_options(ecitmatch, params)
    # Using just 0.333333334 seconds sometimes hit the NCBI rate limit,
    # the slightly longer pause of 0.37 seconds has been more reliable.
    delay = 0.1 if api_key else 0.37
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current

    # By default, post is None. Set to a boolean to over-ride length choice:
    if post is None and len(options) > 1000:
        post = True
    cgi = _construct_cgi(cgi, post, options)

    try:
        if post:
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)
コード例 #19
0
def _open(cgi, params=None, post=None, ecitmatch=False):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    The arugment post should be a boolean to explicitly control if an HTTP
    POST should be used rather an HTTP GET based on the query length.
    By default (post=None), POST is used if the URL encoded paramters would
    be over 1000 characters long.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers.
    """
    # NCBI requirement: At most three queries per second.
    # Equivalently, at least a third of second between queries
    delay = 0.333333334
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current

    params = _construct_params(params)
    options = _encode_options(ecitmatch, params)

    # By default, post is None. Set to a boolean to over-ride length choice:
    if post is None and len(options) > 1000:
        post = True
    cgi = _construct_cgi(cgi, post, options)

    try:
        if post:
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)
コード例 #20
0
def get_sprot_raw(id):
    """Get a text handle to a raw SwissProt entry at ExPASy.

    For an ID of XXX, fetches http://www.uniprot.org/uniprot/XXX.txt
    (as per the http://www.expasy.ch/expasy_urls.html documentation).

    >>> from Bio import ExPASy
    >>> from Bio import SwissProt
    >>> with ExPASy.get_sprot_raw("O23729") as handle:
    ...     record = SwissProt.read(handle)
    ...
    >>> print(record.entry_name)
    CHS3_BROFI

    For a non-existing identifier, UniProt returns an error:

    >>> ExPASy.get_sprot_raw("DOES_NOT_EXIST")
    Traceback (most recent call last):
    ...
    urllib.error.HTTPError: HTTP Error 404: Not Found

    """
    url = "http://www.uniprot.org/uniprot/%s.txt" % id
    return _binary_to_string_handle(_urlopen(url))
コード例 #21
0
ファイル: __init__.py プロジェクト: HuttonICS/biopython
def get_sprot_raw(id):
    """Get a text handle to a raw SwissProt entry at ExPASy.

    For an ID of XXX, fetches http://www.uniprot.org/uniprot/XXX.txt
    (as per the https://www.expasy.org/expasy_urls.html documentation).

    >>> from Bio import ExPASy
    >>> from Bio import SwissProt
    >>> with ExPASy.get_sprot_raw("O23729") as handle:
    ...     record = SwissProt.read(handle)
    ...
    >>> print(record.entry_name)
    CHS3_BROFI

    For a non-existing identifier, UniProt returns an error:

    >>> ExPASy.get_sprot_raw("DOES_NOT_EXIST")
    Traceback (most recent call last):
    ...
    urllib.error.HTTPError: HTTP Error 404: Not Found

    """
    url = "http://www.uniprot.org/uniprot/%s.txt" % id
    return _binary_to_string_handle(_urlopen(url))
コード例 #22
0
ファイル: __init__.py プロジェクト: BIGLabHYU/biopython
def _open(cgi, params=None, post=None, ecitmatch=False):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    The arugment post should be a boolean to explicitly control if an HTTP
    POST should be used rather an HTTP GET based on the query length.
    By default (post=None), POST is used if the query URL would be over
    1000 characters long.

    The arugment post should be a boolean to explicitly control if an HTTP
    POST should be used rather an HTTP GET based on the query length.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers.
    """
    if params is None:
        params = {}
    # NCBI requirement: At most three queries per second.
    # Equivalently, at least a third of second between queries
    delay = 0.333333334
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current
    # Remove None values from the parameters
    for key, value in list(params.items()):
        if value is None:
            del params[key]
    # Tell Entrez that we are using Biopython (or whatever the user has
    # specified explicitly in the parameters or by changing the default)
    if "tool" not in params:
        params["tool"] = tool
    # Tell Entrez who we are
    if "email" not in params:
        if email is not None:
            params["email"] = email
        else:
            warnings.warn("""
Email address is not specified.

To make use of NCBI's E-utilities, NCBI requires you to specify your
email address with each request.  As an example, if your email address
is [email protected], you can specify it as follows:
   from Bio import Entrez
   Entrez.email = '*****@*****.**'
In case of excessive usage of the E-utilities, NCBI will attempt to contact
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)

    # Open a handle to Entrez.
    options = _urlencode(params, doseq=True)
    # _urlencode encodes pipes, which NCBI expects in ECitMatch
    if ecitmatch:
        options = options.replace('%7C', '|')
    # print cgi + "?" + options

    # By default, post is None. Set to a boolean to over-ride length choice:
    if post is None and len(options) > 1000:
        post = True
    try:
        if post:
            # HTTP POST
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            # HTTP GET
            cgi += "?" + options
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)
コード例 #23
0
ファイル: __init__.py プロジェクト: wojdyr/biopython
def _open(cgi, params=None, ecitmatch=False):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers, and makes the request through POST
    rather than GET if the number of characters in the resulting query is
    greater than 1000.
    """
    if params is None:
        params = {}
    # NCBI requirement: At most three queries per second.
    # Equivalently, at least a third of second between queries
    delay = 0.333333334
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current
    # Remove None values from the parameters
    for key, value in list(params.items()):
        if value is None:
            del params[key]
    # Tell Entrez that we are using Biopython (or whatever the user has
    # specified explicitly in the parameters or by changing the default)
    if "tool" not in params:
        params["tool"] = tool
    # Tell Entrez who we are
    if "email" not in params:
        if email is not None:
            params["email"] = email
        else:
            warnings.warn(
                """
Email address is not specified.

To make use of NCBI's E-utilities, NCBI requires you to specify your
email address with each request.  As an example, if your email address
is [email protected], you can specify it as follows:
   from Bio import Entrez
   Entrez.email = '*****@*****.**'
In case of excessive usage of the E-utilities, NCBI will attempt to contact
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)

    # By default, we do not force a POST request
    force_post = False

    # Make sure the UIDs are in the format UID,UID,...
    ids = params.get("id", None)
    if ids is not None:
        # Detect whether 200+ UIDs have been provided, and convert the list
        # [UID, UID, ...] into the string "UID,UID,..."
        if isinstance(ids, list):
            params["id"] = ",".join(ids)
        elif isinstance(ids, str):
            ids = ids.split(",")

        # If 200+ UIDs are given, force the POST request
        force_post = len(ids) > 200

    # Open a handle to Entrez.
    options = _urlencode(params, doseq=True)
    # _urlencode encodes pipes, which NCBI expects in ECitMatch
    if ecitmatch:
        options = options.replace('%7C', '|')
    # print cgi + "?" + options

    post = force_post or len(options) > 1000
    try:
        if post:
            # HTTP POST
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            # HTTP GET
            cgi += "?" + options
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)
コード例 #24
0
E-utilities.""", UserWarning)
    # Open a handle to Entrez.
    options = urllib.urlencode(params, doseq=True)
    #print cgi + "?" + options
    try:
        if post:
            #HTTP POST
            handle = urllib2.urlopen(cgi, data=options)
        else:
            #HTTP GET
            cgi += "?" + options
            handle = urllib2.urlopen(cgi)
    except urllib2.HTTPError, exception:
        raise exception

    return _binary_to_string_handle(handle)


_open.previous = 0


def _test():
    """Run the module's doctests (PRIVATE)."""
    print "Runing doctests..."
    import doctest
    doctest.testmod()
    print "Done"


if __name__ == "__main__":
    _test()
コード例 #25
0
ファイル: __init__.py プロジェクト: DunbrackLab/biopython
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)
    # Open a handle to Entrez.
    options = urllib.urlencode(params, doseq=True)
    #print cgi + "?" + options
    try:
        if post:
            #HTTP POST
            handle = urllib2.urlopen(cgi, data=_as_bytes(options))
        else:
            #HTTP GET
            cgi += "?" + options
            handle = urllib2.urlopen(cgi)
    except urllib2.HTTPError, exception:
        raise exception

    return _binary_to_string_handle(handle)

_open.previous = 0


def _test():
    """Run the module's doctests (PRIVATE)."""
    print "Running doctests..."
    import doctest
    doctest.testmod()
    print "Done"

if __name__ == "__main__":
    _test()
コード例 #26
0
ファイル: __init__.py プロジェクト: tcyb/biopython
def _open(cgi, params=None, ecitmatch=False):
    """Helper function to build the URL and open a handle to it (PRIVATE).

    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
    params is a dictionary with the options to pass to it.  Does some
    simple error checking, and will raise an IOError if it encounters one.

    This function also enforces the "up to three queries per second rule"
    to avoid abusing the NCBI servers, and makes the request through POST
    rather than GET if the number of characters in the resulting query is
    greater than 1000.
    """
    if params is None:
        params = {}
    # NCBI requirement: At most three queries per second.
    # Equivalently, at least a third of second between queries
    delay = 0.333333334
    current = time.time()
    wait = _open.previous + delay - current
    if wait > 0:
        time.sleep(wait)
        _open.previous = current + wait
    else:
        _open.previous = current
    # Remove None values from the parameters
    for key, value in list(params.items()):
        if value is None:
            del params[key]
    # Tell Entrez that we are using Biopython (or whatever the user has
    # specified explicitly in the parameters or by changing the default)
    if "tool" not in params:
        params["tool"] = tool
    # Tell Entrez who we are
    if "email" not in params:
        if email is not None:
            params["email"] = email
        else:
            warnings.warn("""
Email address is not specified.

To make use of NCBI's E-utilities, NCBI requires you to specify your
email address with each request.  As an example, if your email address
is [email protected], you can specify it as follows:
   from Bio import Entrez
   Entrez.email = '*****@*****.**'
In case of excessive usage of the E-utilities, NCBI will attempt to contact
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)
    
    # By default, we do not force a POST request
    force_post = False
    
    # Make sure the UIDs are in the format UID,UID,...
    ids = params.get("id", None)
    if ids is not None:
        # Detect whether 200+ UIDs have been provided, and convert the list
        # [UID, UID, ...] into the string "UID,UID,..."
        if isinstance(ids, list):
            params["id"] = ",".join(ids)
        elif isinstance(ids, str):
            ids = ids.split(",")
        
        # If 200+ UIDs are given, force the POST request
        force_post = len(ids) > 200
    
    # Open a handle to Entrez.
    options = _urlencode(params, doseq=True)
    # _urlencode encodes pipes, which NCBI expects in ECitMatch
    if ecitmatch:
        options = options.replace('%7C', '|')
    # print cgi + "?" + options
    
    post = force_post or len(options) > 1000
    try:
        if post:
            # HTTP POST
            handle = _urlopen(cgi, data=_as_bytes(options))
        else:
            # HTTP GET
            cgi += "?" + options
            handle = _urlopen(cgi)
    except _HTTPError as exception:
        raise exception

    return _binary_to_string_handle(handle)