Esempio n. 1
def search_count(db, query):
    """TogoWS search count (returns an integer).

    db - database (string), see
    query - search term (string)

    You could then use the count to download a large set of search results in
    batches using the offset and limit options to In
    general however the Bio.TogoWS.search_iter() function is simpler to use.
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
        warnings.warn("TogoWS search does not officially support database '%s'. "
                      "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s/count" % (db, _quote(query))
    handle = _open(url)
    data =
        count = int(data.strip())
    except ValueError:
        raise ValueError("Expected an integer from URL %s, got: %r" % (url, data))
    return count
Esempio n. 2
def search_count(db, query):
    """TogoWS search count (returns an integer).

    db - database (string), see
    query - search term (string)

    You could then use the count to download a large set of search results in
    batches using the offset and limit options to In
    general however the Bio.TogoWS.search_iter() function is simpler to use.
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
            "TogoWS search does not officially support database '%s'. "
            "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s/count" % (db, _quote(query))
    handle = _open(url)
    data =
        count = int(data.strip())
    except ValueError:
        raise ValueError("Expected an integer from URL %s, got: %r" %
                         (url, data))
    return count
Esempio n. 3
def entry(db, id, format=None, field=None):
    """TogoWS fetch entry (returns a handle).

    db - database (string), see list below.
    id - identier (string) or a list of identifiers (either as a list of
         strings or a single string with comma separators).
    format - return data file format (string), options depend on the database
             e.g. "xml", "json", "gff", "fasta", "ttl" (RDF Turtle)
    field - specific field from within the database record (string)
            e.g. "au" or "authors" for pubmed.

    At the time of writing, this includes the following:

    KEGG: compound, drug, enzyme, genes, glycan, orthology, reaction,
          module, pathway
    DDBj: ddbj, dad, pdb
    NCBI: nuccore, nucest, nucgss, nucleotide, protein, gene, onim,
          homologue, snp, mesh, pubmed
    EBI:  embl, uniprot, uniparc, uniref100, uniref90, uniref50

    For the current list, please see

    This function is essentially equivalent to the NCBI Entrez service
    EFetch, available in Biopython as Bio.Entrez.efetch(...), but that
    does not offer field extraction.
    global _entry_db_names, _entry_db_fields, fetch_db_formats
    if _entry_db_names is None:
        _entry_db_names = _get_entry_dbs()
    if db not in _entry_db_names:
        raise ValueError("TogoWS entry fetch does not officially support "
                         "database '%s'." % db)
    if field:
            fields = _entry_db_fields[db]
        except KeyError:
            fields = _get_entry_fields(db)
            _entry_db_fields[db] = fields
        if db == "pubmed" and field == "ti" and "title" in fields:
            #Backwards compatibility fix for TogoWS change Nov/Dec 2013
            field = "title"
            import warnings
            warnings.warn("TogoWS dropped 'pubmed' field alias 'ti', please use 'title' instead.")
        if field not in fields:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "field '%s' for database '%s'. Only: %s"
                             % (field, db, ", ".join(sorted(fields))))
    if format:
            formats = _entry_db_formats[db]
        except KeyError:
            formats = _get_entry_formats(db)
            _entry_db_formats[db] = formats
        if format not in formats:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "format '%s' for database '%s'. Only: %s"
                             % (format, db, ", ".join(sorted(formats))))

    if isinstance(id, list):
        id = ",".join(id)
    url = _BASE_URL + "/entry/%s/%s" % (db, _quote(id))
    if field:
        url += "/" + field
    if format:
        url += "." + format
    return _open(url)
Esempio n. 4
def search(db, query, offset=None, limit=None, format=None):
    """TogoWS search (returns a handle).

    This is a low level wrapper for the TogoWS search function, which
    can return results in a several formats. In general, the search_iter
    function is more suitable for end users.

    db - database (string), see
    query - search term (string)
    offset, limit - optional integers specifying which result to start from
            (1 based) and the number of results to return.
    format - return data file format (string), e.g. "json", "ttl" (RDF)
             By default plain text is returned, one result per line.

    At the time of writing, TogoWS applies a default count limit of 100
    search results, and this is an upper bound. To access more results,
    use the offset argument or the search_iter(...) function.

    TogoWS supports a long list of databases, including many from the NCBI
    (e.g. "ncbi-pubmed" or "pubmed", "ncbi-genbank" or "genbank", and
    "ncbi-taxonomy"), EBI (e.g. "ebi-ebml" or "embl", "ebi-uniprot" or
    "uniprot, "ebi-go"), and KEGG (e.g. "kegg-compound" or "compound").
    For the current list, see

    The NCBI provide the Entrez Search service (ESearch) which is similar,
    available in Biopython as the Bio.Entrez.esearch() function.

    See also the function Bio.TogoWS.search_count() which returns the number
    of matches found, and the Bio.TogoWS.search_iter() function which allows
    you to iterate over the search results (taking care of batching for you).
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
        warnings.warn("TogoWS search does not explicitly support database '%s'. "
                      "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s" % (db, _quote(query))
    if offset is not None and limit is not None:
            offset = int(offset)
            raise ValueError("Offset should be an integer (at least one), not %r" % offset)
            limit = int(limit)
            raise ValueError("Limit should be an integer (at least one), not %r" % limit)
        if offset <= 0:
            raise ValueError("Offset should be at least one, not %i" % offset)
        if limit <= 0:
            raise ValueError("Count should be at least one, not %i" % limit)
        url += "/%i,%i" % (offset, limit)
    elif offset is not None or limit is not None:
        raise ValueError("Expect BOTH offset AND limit to be provided (or neither)")
    if format:
        url += "." + format
    return _open(url)
Esempio n. 5
def entry(db, id, format=None, field=None):
    """TogoWS fetch entry (returns a handle).

    db - database (string), see list below.
    id - identier (string) or a list of identifiers (either as a list of
         strings or a single string with comma separators).
    format - return data file format (string), options depend on the database
             e.g. "xml", "json", "gff", "fasta", "ttl" (RDF Turtle)
    field - specific field from within the database record (string)
            e.g. "au" or "authors" for pubmed.

    At the time of writing, this includes the following:

    KEGG: compound, drug, enzyme, genes, glycan, orthology, reaction,
          module, pathway
    DDBj: ddbj, dad, pdb
    NCBI: nuccore, nucest, nucgss, nucleotide, protein, gene, onim,
          homologue, snp, mesh, pubmed
    EBI:  embl, uniprot, uniparc, uniref100, uniref90, uniref50

    For the current list, please see

    This function is essentially equivalent to the NCBI Entrez service
    EFetch, available in Biopython as Bio.Entrez.efetch(...), but that
    does not offer field extraction.
    global _entry_db_names, _entry_db_fields, fetch_db_formats
    if _entry_db_names is None:
        _entry_db_names = _get_entry_dbs()
    if db not in _entry_db_names:
        raise ValueError("TogoWS entry fetch does not officially support "
                         "database '%s'." % db)
    if field:
            fields = _entry_db_fields[db]
        except KeyError:
            fields = _get_entry_fields(db)
            _entry_db_fields[db] = fields
        if db == "pubmed" and field == "ti" and "title" in fields:
            #Backwards compatibility fix for TogoWS change Nov/Dec 2013
            field = "title"
            import warnings
                "TogoWS dropped 'pubmed' field alias 'ti', please use 'title' instead."
        if field not in fields:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "field '%s' for database '%s'. Only: %s" %
                             (field, db, ", ".join(sorted(fields))))
    if format:
            formats = _entry_db_formats[db]
        except KeyError:
            formats = _get_entry_formats(db)
            _entry_db_formats[db] = formats
        if format not in formats:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "format '%s' for database '%s'. Only: %s" %
                             (format, db, ", ".join(sorted(formats))))

    if isinstance(id, list):
        id = ",".join(id)
    url = _BASE_URL + "/entry/%s/%s" % (db, _quote(id))
    if field:
        url += "/" + field
    if format:
        url += "." + format
    return _open(url)
Esempio n. 6
def search(db, query, offset=None, limit=None, format=None):
    """TogoWS search (returns a handle).

    This is a low level wrapper for the TogoWS search function, which
    can return results in a several formats. In general, the search_iter
    function is more suitable for end users.

    db - database (string), see
    query - search term (string)
    offset, limit - optional integers specifying which result to start from
            (1 based) and the number of results to return.
    format - return data file format (string), e.g. "json", "ttl" (RDF)
             By default plain text is returned, one result per line.

    At the time of writing, TogoWS applies a default count limit of 100
    search results, and this is an upper bound. To access more results,
    use the offset argument or the search_iter(...) function.

    TogoWS supports a long list of databases, including many from the NCBI
    (e.g. "ncbi-pubmed" or "pubmed", "ncbi-genbank" or "genbank", and
    "ncbi-taxonomy"), EBI (e.g. "ebi-ebml" or "embl", "ebi-uniprot" or
    "uniprot, "ebi-go"), and KEGG (e.g. "kegg-compound" or "compound").
    For the current list, see

    The NCBI provide the Entrez Search service (ESearch) which is similar,
    available in Biopython as the Bio.Entrez.esearch() function.

    See also the function Bio.TogoWS.search_count() which returns the number
    of matches found, and the Bio.TogoWS.search_iter() function which allows
    you to iterate over the search results (taking care of batching for you).
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
            "TogoWS search does not explicitly support database '%s'. "
            "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s" % (db, _quote(query))
    if offset is not None and limit is not None:
            offset = int(offset)
            raise ValueError(
                "Offset should be an integer (at least one), not %r" % offset)
            limit = int(limit)
            raise ValueError(
                "Limit should be an integer (at least one), not %r" % limit)
        if offset <= 0:
            raise ValueError("Offset should be at least one, not %i" % offset)
        if limit <= 0:
            raise ValueError("Count should be at least one, not %i" % limit)
        url += "/%i,%i" % (offset, limit)
    elif offset is not None or limit is not None:
        raise ValueError(
            "Expect BOTH offset AND limit to be provided (or neither)")
    if format:
        url += "." + format
    return _open(url)