def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages (default is off) """ url = "http://www.rcsb.org/pdb/rest" self.services = REST(name="PDB", url=url, verbose=verbose, cache=cache)
def __init__(self, verbose=False, cache=False): """**Constructor** """ self.services = REST(name="cog", url=COG._url, verbose=verbose, cache=cache)
def __init__(self, verbose=False): url = "http://www.ebi.ac.uk/Tools/services/rest/muscle" self.services = REST(name='MUSCLE', url=url, verbose=verbose) self._parameters = None self._parametersDetails = {} self._headers = { "User-Agent": self.services.getUserAgent(), "accept": "application/json" }
def __init__(self, verbose=True): """.. rubric:: Constructor :param bool verbose: """ url = "https://www.ebi.ac.uk/Tools/services/rest/emboss_seqret" self.services = REST(name="seqret", url=url, verbose=verbose) self._parameters = None
def __init__(self, verbose=False, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ self.services = REST(name="ENA", url=ENA.url, verbose=verbose, cache=cache) self.services.TIMEOUT = 100
def __init__(self, verbose=False): """.. rubric:: NCBIblast constructor :param bool verbose: prints informative messages """ url = "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast" self.services = REST(name="NCBIblast", url=url, verbose=verbose) self._parameters = None self._parametersDetails = {} self.checkInterval = 2
def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages (default is off) """ self.services = REST(name="PDB", verbose=verbose, cache=cache, url_defined_later=True) self.services.url = PDB._url
def __init__(self, verbose=True, cache=False): """.. rubric:: Constructor :param bool verbose: """ self.services = REST(name="BioDBNet", url=BioDBNet._url, verbose=verbose, cache=cache) self._valid_inputs = self.getInputs()
def __init__(self, verbose=True, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages """ self.easyXMLConversion = False self._default_extension = "json" self.services = REST(name='PathwayCommons', url=PathwayCommons._url, verbose=verbose, cache=cache)
def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages (default is off) """ url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/" self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache)
def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages (default is off) """ url = "https://mygene.info/v3" self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache)
def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages """ self.services = REST(name="ArrayExpress", url="http://www.ebi.ac.uk/arrayexpress", cache=cache, verbose=verbose) self.version = "v2"
def __init__(self, verbose=True, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ #super(Panther, self).__init__(name="Panther", url=Panther._url, # verbose=verbose, cache=cache) self.services = REST(name="Panther", url=Panther._url, verbose=verbose, cache=cache) self._allPathwaysURL = "http://www.pantherdb.org/pathway/pathwayList.jsp"
def __init__(self, verbose=True, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ self.services = REST(name="BioCarta", url=BioCarta._url, cache=cache, verbose=verbose) self.fname = "biocarta_pathways.txt" self._organism = None self._organism_prefix = None self._pathways = None
def __init__(self, verbose=True, cache=False): """.. rubric:: Rhea constructor :param bool verbose: True by default :: >>> from bioservices import Rhea >>> r = Rhea() """ self.services = REST(name="Rhea", url=Rhea._url, verbose=verbose, cache=cache)
def __init__(self, version="1.0", verbose=True, cache=False): """.. rubric:: Rhea constructor :param str version: the current version of the interface (1.0) :param bool verbose: True by default :: >>> from bioservices import Rhea >>> r = Rhea() """ self.services = REST(name="Rhea", url=Rhea._url, verbose=verbose, cache=cache) self.version = version self.format_entry = ["cmlreact", "biopax2", "rxn"]
class BioCarta(): """Interface to `BioCarta <http://www.biocarta.com>`_ pages This is not a REST interface actually but rather a parser to some of the HTML pages related to pathways. One can retrieve the pathways names and their list of proteins. >>> from bioservics import * >>> b = BioCarta() >>> pathways = b.get_pathway_names() >>> proteins = b.get_pathway_protein_names(pathways[0]) .. warning:: biocarta pathways layout can be accesses from PID """ _url = "http://cgap.nci.nih.gov/Pathways/BioCarta_Pathways" _organism_prefixes = {'H**o sapiens': 'h', 'Mus musculus': 'm'} organisms = set(_organism_prefixes.keys()) _all_pathways = None _pathway_categories = None _all_pathways_url = "http://cgap.nci.nih.gov/Pathways/BioCarta_Pathways" def __init__(self, verbose=True, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ self.services = REST(name="BioCarta", url=BioCarta._url, cache=cache, verbose=verbose) self.fname = "biocarta_pathways.txt" self._organism = None self._organism_prefix = None self._pathways = None # set the default organism used by pathways retrieval def _get_organism(self): return self._organism def _set_organism(self, organism): organism = organism[:1].upper() + organism[1:].lower() if organism == self._organism: return if organism not in BioCarta.organisms: raise ValueError( "Invalid organism. Check the list in :attr:`organisms` attribute" ) self._organism = organism self._organism_prefix = BioCarta._organism_prefixes[organism] self._pathways = None organism = property(_get_organism, _set_organism, doc="returns the current default organism") def _get_pathway_categories(self): if self._pathway_categories is None: self._pathway_categories = self.services.http_get_ou_post() return self._pathway_categories pathway_categories = property(_get_pathway_categories) def _get_all_pathways(self): """returns pathways from biocarta human and mouse organisms are available but only those corresponding to the organism defined in :attr:`organism` are returned. """ if self.organism is None: raise ValueError("Please set the organism attribute to one of %s" % self._organism_prefixes.keys()) if BioCarta._all_pathways is None: BioCarta._all_pathways = readXML(self._all_pathways_url) if self._pathways is None: url_pattern = re.compile("http://cgap.nci.nih.gov/Pathways/BioCarta/%s_(.+)[Pp]athway" \ % (self._organism_prefix)) is_pathway_url = lambda tag: tag.name == "a" and not tag.has_attr( "class") self._pathways = BioCarta._all_pathways.findAll(is_pathway_url, href=url_pattern) # Now let us select only the name. self._pathways = sorted([ entry.attrs['href'].rsplit("/", 1)[1] for entry in self._pathways ]) return self._pathways all_pathways = property(_get_all_pathways) def get_pathway_protein_names(self, pathway): """returns list of genes for the corresponding pathway This function scans an HTML page. We have not found another way to get the gene list in a more reobust way. This function was tested on one pathway. Please use with caution. """ self.logging.info("Fetching the pathway") # first identify gene from GeneInfo tag # this is not XML but HTML url = "http://cgap.nci.nih.gov/Pathways/BioCarta/%s" % pathway html_doc = urlopen(url).read() soup = BeautifulSoup(html_doc, 'html.parser') links = soup.find_all('area') links = [link for link in links if 'GeneInfo' in link.get('href')] links = set([link.attrs['href'] for link in links]) self.logging.info("Scanning information about %s genes" % len(links)) # open each page and get info genes = {} for link in links: html_doc = urlopen(link).read() soup = BeautifulSoup(html_doc, 'html.parser') table_gene_info = soup.findAll("table")[1] gene_name = link.rsplit("=", 1)[1] self.logging.info(" - " + gene_name) genes[gene_name] = {} self.tt = table_gene_info for row in table_gene_info.find_all('tr'): entry = row.find_all('td') try: key = entry[0].text.strip() except: continue try: value = entry[1].text.strip() except: continue if "[Text]" in key: continue genes[gene_name][key] = value return genes
def __init__(self, verbose=False, cache=False): # http://bigg.ucsd.edu/data_access self.services = REST(name="BiGG", url=BiGG._url, cache=cache, requests_per_sec=10, verbose=verbose)
class BiGG(): """ Interface to the `BiGG Models <http://bigg.ucsd.edu/>` API Service. :: >>> from bioservices import BiGG >>> bigg = BiGG() >>> bigg.search("e coli", "models") [{'bigg_id': 'e_coli_core', 'gene_count': 137, 'reaction_count': 95, 'organism': 'Escherichia coli str. K-12 substr. MG1655', 'metabolite_count': 72}, ... ] """ _base_url = "http://bigg.ucsd.edu" _api_version = "v2" _url = "%s/api/%s" % (_base_url, _api_version) def __init__(self, verbose=False, cache=False): # http://bigg.ucsd.edu/data_access self.services = REST(name="BiGG", url=BiGG._url, cache=cache, requests_per_sec=10, verbose=verbose) def __len__(self): return len(self.models) @property def version(self): return self.services.http_get("database_version") def _http_get_results(self, *args, **kwargs): response = self.services.http_get(*args, **kwargs) return response["results"] @property def models(self): return self._http_get_results("models") def _get_model_resource(self, type_, model_id, ids=None): if type_ not in _ACCEPTABLE_MODEL_RESOURCE_TYPES: raise TypeError("Unknown model resource type %s. Acceptable types are %s" % (type_, _ACCEPTABLE_MODEL_RESOURCE_TYPES)) query = "models/%s/%s" % (model_id, type_) if ids is None: return self._http_get_results(query) ids = sequencify(ids) queries = [("%s/%s" % (query, id_)) for id_ in ids] response = self.services.http_get(queries) return squash(response) def metabolites(self, model_id=None, ids=None): if model_id is None: return self._http_get_results("universal/metabolites") return self._get_model_resource("metabolites", model_id=model_id, ids=ids) def reactions(self, model_id=None, ids=None): if model_id is None: return self._http_get_results("universal/reactions") return self._get_model_resource("reactions", model_id=model_id, ids=ids) def genes(self, model_id, ids=None): return self._get_model_resource("genes", model_id=model_id, ids=ids) def search(self, query, type_): if type_ not in _ACCEPTABLE_SEARCH_TYPES: raise TypeError("Unknown type %s. Acceptable types are %s" % (type_, _ACCEPTABLE_SEARCH_TYPES)) params = { "query": query, "search_type": type_ } return self._http_get_results("search", params=params) def download(self, model_id, format_="json", gzip=True, target=None): if format_ not in _ACCEPTABLE_MODEL_DOWNLOAD_FORMATS: raise TypeError("Unknown format %s. Accepted types are %s." % (format_, _ACCEPTABLE_MODEL_DOWNLOAD_FORMATS)) path = "%s.%s" % (model_id, format_) if gzip: path += ".gz" if not target: target = path url = self.services._build_url("%s/static/models/%s" % (BiGG._base_url, path)) response = self.services.session.get(url, stream=True) if response.ok: with open(target, "wb") as f: for content in response.iter_content(): f.write(content) else: response.raise_for_status()
class PathwayCommons(): """Interface to the `PathwayCommons <http://www.pathwaycommons.org/about>`_ service >>> from bioservices import * >>> pc2 = PathwayCommons(verbose=False) >>> res = pc2.get("http://identifiers.org/uniprot/Q06609") .. todo:: traverse() method not implemented. """ #: valid formats _valid_format = ["GSEA", "SBGN", "BIOPAX", "SIF", "TXT", "JSONLD"] _valid_directions = ["BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED"] _valid_patterns = [ "CONTROLS_STATE_CHANGE_OF", "CONTROLS_PHOSPHORYLATION_OF", "CONTROLS_TRANSPORT_OF", "CONTROLS_EXPRESSION_OF", "IN_COMPLEX_WITH", "INTERACTS_WITH", "CATALYSIS_PRECEDES", "NEIGHBOR_OF", "CONSUMPTION_CONTROLLED_BY", "CONTROLS_TRANSPORT_OF_CHEMICAL", "CONTROLS_PRODUCTION_OF", "CHEMICAL_AFFECTS", "REACTS_WITH", "USED_TO_PRODUCE"] _url = "https://www.pathwaycommons.org" def __init__(self, verbose=True, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages """ self.easyXMLConversion = False self._default_extension = "json" self.services = REST(name='PathwayCommons', url=PathwayCommons._url, verbose=verbose, cache=cache) # just a get/set to the default extension def _set_default_ext(self, ext): self.services.devtools.check_param_in_list(ext, ["json", "xml"]) self._default_extension = ext def _get_default_ext(self): return self._default_extension default_extension = property(_get_default_ext, _set_default_ext, doc="set extension of the requests (default is json). Can be 'json' or 'xml'") def search(self, q, page=0, datasource=None, organism=None, type=None): """Text search in PathwayCommons using Lucene query syntax Some of the parameters are BioPAX properties, others are composite relationships. All index fields are (case-sensitive): comment, ecnumber, keyword, name, pathway, term, xrefdb, xrefid, dataSource, and organism. The pathway field maps to all participants of pathways that contain the keyword(s) in any of its text fields. Finally, keyword is a transitive aggregate field that includes all searchable keywords of that element and its child elements. All searches can also be filtered by data source and organism. It is also possible to restrict the domain class using the 'type' parameter. This query can be used standalone or to retrieve starting points for graph searches. :param str q: requires a keyword , name, external identifier, or a Lucene query string. :param int page: (N>=0, default is 0), search result page number. :param str datasource: filter by data source (use names or URIs of pathway data sources or of any existing Provenance object). If multiple data source values are specified, a union of hits from specified sources is returned. datasource=[reactome,pid] returns hits associated with Reactome or PID. :param str organism: The organism can be specified either by official name, e.g. "h**o sapiens" or by NCBI taxonomy id, e.g. "9606". Similar to data sources, if multiple organisms are declared a union of all hits from specified organisms is returned. For example organism=[9606, 10016] returns results for both human and mice. :param str type: BioPAX class filter. (e.g., 'pathway', 'proteinreference') .. doctest:: >>> from bioservices import PathwayCommons >>> pc2 = PathwayCommons(vverbose=False) >>> pc2.search("Q06609") >>> pc2.search("brca2", type="proteinreference", organism="h**o sapiens", datasource="pid") >>> pc2.search("name:'col5a1'", type="proteinreference", organism=9606) >>> pc2.search("a*", page=3) Find the FGFR2 keyword:: pc2.search("FGFR2") Find pathways by FGFR2 keyword in any index field.:: pc2.search("FGFR2", type="pathway") Finds control interactions that contain the word binding but not transcription in their indexed fields:: pc2.search("binding NOT transcription", type="control") Find all interactions that directly or indirectly participate in a pathway that has a keyword match for "immune" (Note the star after immune): pc.search("pathway:immune*", type="conversion") Find all Reactome pathways:: pc.search("*", type="pathway", datasource="reactome") """ if self.default_extension == "xml": url = "pc2/search.xml?q=%s" % q elif self.default_extension == "json": url = "pc2/search.json?q=%s" % q params = {} if page>=0: params['page'] = page else: self.services.logging.warning("page should be >=0") if datasource: params['datasource'] = datasource if type: params['type'] = type if organism: params['organism'] = organism res = self.services.http_get(url, frmt=self.default_extension, params=params) #if self.default_extension == "json": # res = json.loads(res) if self.default_extension == "xml": res = self.easyXML(res) return res def get(self, uri, frmt="BIOPAX"): """Retrieves full pathway information for a set of elements elements can be for example pathway, interaction or physical entity given the RDF IDs. Get commands only retrieve the BioPAX elements that are directly mapped to the ID. Use the :meth:`traverse` query to traverse BioPAX graph and obtain child/owner elements. :param str uri: valid/existing BioPAX element's URI (RDF ID; for utility classes that were "normalized", such as entity refereneces and controlled vocabularies, it is usually a Identifiers.org URL. Multiple IDs can be provided using list uri=[http://identifiers.org/uniprot/Q06609, http://identifiers.org/uniprot/Q549Z0'] See also about MIRIAM and Identifiers.org. :param str format: output format (values) :return: a complete BioPAX representation for the record pointed to by the given URI is returned. Other output formats are produced by converting the BioPAX record on demand and can be specified by the optional format parameter. Please be advised that with some output formats it might return "no result found" error if the conversion is not applicable for the BioPAX result. For example, BINARY_SIF output usually works if there are some interactions, complexes, or pathways in the retrieved set and not only physical entities. .. doctest:: >>> from bioservices import PathwayCommons >>> pc2 = PathwayCommons(verbose=False) >>> res = pc2.get("col5a1") >>> res = pc2.get("http://identifiers.org/uniprot/Q06609") """ self.services.devtools.check_param_in_list(frmt, self._valid_format) # validates the URIs if isinstance(uri, str): url = "pc2/get?uri=" +uri elif instance(uri, list): url = "pc2/get?uri=" +uri[0] if len(uri)>1: for u in uri[1:]: url += "&uri=" + u # ?uri=http://identifiers.org/uniprot/Q06609 # http://www.pathwaycommons.org/pc2/get?uri=COL5A1 if frmt != "BIOPAX": url += "&format=%s" % frmt if frmt.lower() in ["biopax", "sbgn"]: frmt = "xml" else: frmt = "txt" res = self.services.http_get(url, frmt=frmt) return res def top_pathways(self, query="*", datasource=None, organism=None): """This command returns all *top* pathways Pathways can be top or pathways that are neither 'controlled' nor 'pathwayComponent' of another process. :param query: a keyword, name, external identifier or lucene query string like in 'search'. Default is "*" :param str datasource: filter by data source (same as search) :param str organism: organism filter. 9606 for human. :return: dictionary with information about top pathways. Check the "searchHit" key for information about "dataSource" for instance .. doctest:: >>> from bioservices import PathwayCommons >>> pc2 = PathwayCommons(verbose=False) >>> res = pc2.top_pathways() https://www.pathwaycommons.org/pc2/top_pathways?q=TP53 """ if self.default_extension == "json": url = "pc2/top_pathways.json" else: url = "pc2/top_pathways" params = {} if datasource: params['datasource'] = datasource if organism: params['organism'] = organism params['q'] = query res = self.services.http_get(url, frmt=self.default_extension, params=params) if self.default_extension == "xml": res = self.easyXML(res) return res def graph(self, kind, source, target=None, direction=None, limit=1, frmt=None, datasource=None, organism=None): """Finds connections and neighborhoods of elements Connections can be for example the shortest path between two proteins or the neighborhood for a particular protein state or all states. Graph searches take detailed BioPAX semantics such as generics or nested complexes into account and traverse the graph accordingly. The starting points can be either physical entites or entity references. In the case of the latter the graph search starts from ALL the physical entities that belong to that particular entity references, i.e. all of its states. Note that we integrate BioPAX data from multiple databases based on our proteins and small molecules data warehouse and consistently normalize UnificationXref, EntityReference, Provenance, BioSource, and ControlledVocabulary objects when we are absolutely sure that two objects of the same type are equivalent. We, however, do not merge physical entities and reactions from different sources as matching and aligning pathways at that level is still an open research problem. As a result, graph searches can return several similar but disconnected sub-networks that correspond to the pathway data from different providers (though some physical entities often refer to the same small molecule or protein reference or controlled vocabulary). :param str kind: graph query :param str source: source object's URI/ID. Multiple source URIs/IDs must be encoded as list of valid URI **source=['http://identifiers.org/uniprot/Q06609', 'http://identifiers.org/uniprot/Q549Z0']**. :param str target: required for PATHSFROMTO graph query. target URI/ID. Multiple target URIs must be encoded as list (see source parameter). :param str direction: graph search direction in [BOTHSTREAM, DOWNSTREAM, UPSTREAM] see :attr:`_valid_directions` attribute. :param int limit: graph query search distance limit (default = 1). :param str format: output format. see :attr:`_valid-format` :param str datasource: datasource filter (same as for 'search'). :param str organism: organism filter (same as for 'search'). :return: By default, graph queries return a complete BioPAX representation of the subnetwork matched by the algorithm. Other output formats are available as specified by the optional format parameter. Please be advised that some output format choices might cause "no result found" error if the conversion is not applicable for the BioPAX result (e.g., BINARY_SIF output fails if there are no interactions, complexes, nor pathways in the retrieved set). .. doctest:: >>> from bioservices import PathwayCommons >>> pc2 = PathwayCommons(verbose=False) >>> res = pc2.graph(source="http://identifiers.org/uniprot/P20908", kind="neighborhood", format="EXTENDED_BINARY_SIF") """ url = "pc2/graph" params = {} params['source'] = source params['kind'] = kind params['limit'] = limit params = {} if target: params['target'] = target if frmt: params['format'] = frmt if datasource: params['datasource'] = datasource if organism: params['organism'] = organism res = self.services.http_get(url, frmt="txt", params=params) return res def traverse(self, uri, path): """Provides XPath-like access to the PC. The format of the path query is in the form:: [InitialClass]/[property1]:[classRestriction(optional)]/[property2]... A "*" sign after the property instructs path accessor to transitively traverse that property. For example, the following path accessor will traverse through all physical entity components within a complex:: "Complex/component*/entityReference/xref:UnificationXref" The following will list display names of all participants of interactions, which are components (pathwayComponent) of a pathway (note: pathwayOrder property, where same or other interactions can be reached, is not considered here):: "Pathway/pathwayComponent:Interaction/participant*/displayName" The optional parameter classRestriction allows to restrict/filter the returned property values to a certain subclass of the range of that property. In the first example above, this is used to get only the Unification Xrefs. Path accessors can use all the official BioPAX properties as well as additional derived classes and parameters in paxtools such as inverse parameters and interfaces that represent anonymous union classes in OWL. (See Paxtools documentation for more details). :param str uri: a biopax element URI - specified similar to the 'GET' command. multiple IDs are allowed as a list of strings. :param str path: a BioPAX propery path in the form of property1[:type1]/property2[:type2]; see above, inverse properties, Paxtools, org.biopax.paxtools.controller.PathAccessor. .. seealso:: `properties <http://www.pathwaycommons.org/pc2/#biopax_properties>`_ :return: XML result that follows the Search Response XML Schema (TraverseResponse type; pagination is disabled: returns all values at once) :: from bioservices import PathwayCommons pc2 = PathwayCommons(verbose=False) res = pc2.traverse(uri=['http://identifiers.org/uniprot/P38398','http://identifiers.org/uniprot/Q06609'], path="ProteinReference/organism") res = pc2.traverse(uri="http://identifiers.org/uniprot/Q06609", path="ProteinReference/entityReferenceOf:Protein/name") res = pc2.traverse("http://identifiers.org/uniprot/P38398", path="ProteinReference/entityReferenceOf:Protein") res = pc2.traverse(uri=["http://identifiers.org/uniprot/P38398", "http://identifiers.org/taxonomy/9606"], path="Named/name") """ url = "pc2/traverse?" if isinstance(uri, str): url += "?uri=" + uri elif isinstance(uri, list): url += "?uri=" + uri[0] for u in uri[1:]: url += "&uri=" + u url += "&path=" + path res = self.services.http_get(url, frmt="json") return res def get_sifgraph_neighborhood(self, source, limit=1, direction="BOTHSTREAM", pattern=None): """finds the neighborhood sub-network in the Pathway Commons Simple Interaction Format (extented SIF) graph (see http://www.pathwaycommons.org/pc2/formats#sif) :param source: set of gene identifiers (HGNC symbol). Can be a list of identifiers or just one string(if only one identifier) :param int limit: Graph traversal depth. Limit > 1 value can result in very large data or error. :param str direction: Graph traversal direction. Use UNDIRECTED if you want to see interacts-with relationships too. :param str pattern: Filter by binary relationship (SIF edge) type(s). one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED". returns: the graph in SIF format. The output must be stripped and returns one line per relation. In each line, items are separated by a tabulation. You can save the text with .sif extensions and it should be ready to use e.g. in cytoscape viewer. :: res = pc.get_sifgraph_neighborhood('BRD4') """ self.services.devtools.check_param_in_list(direction, self._valid_directions) if pattern: self.services.devtools.check_param_in_list(pattern, self._valid_patterns) assert limit>=1 if isinstance(source, str): source = [source] assert isinstance(source, list) source = ",".join(source) params = { "source": source, "limit": limit, "direction": direction} if pattern: params['pattern'] = pattern res = self.services.http_get("sifgraph/v1/neighborhood", params=params, headers=self.services.get_headers(content="text")) return res.content def get_sifgraph_common_stream(self, source, limit=1, direction="DOWNSTREAM", pattern=None): """finds the common stream for them; extracts a sub-network from the loaded Pathway Commons SIF model. :param source: set of gene identifiers (HGNC symbol). Can be a list of identifiers or just one string(if only one identifier) :param int limit: Graph traversal depth. Limit > 1 value can result in very large data or error. :param str direction: Graph traversal direction. Use UNDIRECTED if you want to see interacts-with relationships too. :param str pattern: Filter by binary relationship (SIF edge) type(s). one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED". returns: the graph in SIF format. The output must be stripped and returns one line per relation. In each line, items are separated by a tabulation. You can save the text with .sif extensions and it should be ready to use e.g. in cytoscape viewer. :: res = pc.get_sifgraph_common_stream(['BRD4', 'MYC']) """ self.services.devtools.check_param_in_list(direction, self._valid_directions) if pattern: self.services.devtools.check_param_in_list(pattern, self._valid_patterns) assert limit>=1 if isinstance(source, str): source = [source] assert isinstance(source, list) source = ",".join(source) params = { "source": source, "limit": limit, "direction": direction} if pattern: params['pattern'] = pattern res = self.services.http_get("sifgraph/v1/commonstream", params=params, headers=self.services.get_headers(content="text")) try: return res.content except: # if no match, returns code 406 and "" return None def get_sifgraph_pathsbetween(self, source, limit=1, directed=False, pattern=None): """finds the paths between them; extracts a sub-network from the Pathway Commons SIF graph. :param source: set of gene identifiers (HGNC symbol). Can be a list of identifiers or just one string(if only one identifier) :param int limit: Graph traversal depth. Limit > 1 value can result in very large data or error. :param bool directed: Directionality: 'true' is for DOWNSTREAM/UPSTREAM, 'false' - UNDIRECTED :param str pattern: Filter by binary relationship (SIF edge) type(s). one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED". returns: the graph in SIF format. The output must be stripped and returns one line per relation. In each line, items are separated by a tabulation. You can save the text with .sif extensions and it should be ready to use e.g. in cytoscape viewer. """ if pattern: self.services.devtools.check_param_in_list(pattern, self._valid_patterns) assert limit>=1 if isinstance(source, str): source = [source] assert isinstance(source, list) source = ",".join(source) params = { "source": source, "limit": limit, "directed": directed} if pattern: params['pattern'] = pattern res = self.services.http_get("sifgraph/v1/pathsbetween", params=params, headers=self.services.get_headers(content="text")) return res.content def get_sifgraph_pathsfromto(self, source, target, limit=1, pattern=None): """finds the paths between them; extracts a sub-network from the Pathway Commons SIF graph. :param source: set of gene identifiers (HGNC symbol). Can be a list of identifiers or just one string(if only one identifier) param target: A target set of gene identifiers. :param int limit: Graph traversal depth. Limit > 1 value can result in very large data or error. :param str pattern: Filter by binary relationship (SIF edge) type(s). one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED". returns: the graph in SIF format. The output must be stripped and returns one line per relation. In each line, items are separated by a tabulation. You can save the text with .sif extensions and it should be ready to use e.g. in cytoscape viewer. """ if pattern: self.services.devtools.check_param_in_list(pattern, self._valid_patterns) assert limit>=1 if isinstance(source, str): source = [source] assert isinstance(source, list) source = ",".join(source) if isinstance(target, str): target = [target] assert isinstance(target, list) target = ",".join(target) params = { "source": source, "target": target, "limit": limit} if pattern: params['pattern'] = pattern res = self.services.http_get("sifgraph/v1/pathsfromto", params=params, headers=self.services.get_headers(content="text")) return res.content
class PDBe(): """Interface to part of the `PDBe <http://www.ebi.ac.uk/pdbe>`_ service .. doctest:: >>> from bioservices import PDBe >>> s = PDBe() >>> res = s.get_file("1FBV", "pdb") """ def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages (default is off) """ url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/" self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache) def _check_id(self, pdbid): if isinstance(pdbid, list): pdbid = ",".join(pdbid) if isinstance(pdbid, str): for item in pdbid.split(","): assert len(item) == 4, "a 4-character PDB id code is required" else: raise TypeError( "pdb id must be either a 4-character pdb id, a list of valid PDB ids, or a string made of pdb ids, separated by commas" ) return pdbid def _return(self, res): if res == 404: return {} return res def get_summary(self, query): """Returns summary of a PDB entry This can be title of the entry, list of depositors, date of deposition, date of release, date of latest revision, experimental method, list of related entries in case split entries, etc. :param query: a 4-character PDB id code :: p.get_summary('1cbs') p.get_summary('1cbs,2kv8') p.get_summary(['1cbs', '2kv8']) """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("summary/{}".format(query)) else: res = self.services.http_post("summary", data=query, frmt="json") return self._return(res) def get_molecules(self, query): """Return details of molecules (or entities in mmcif-speak) modelled in the entry This can be entity id, description, type, polymer-type (if applicable), number of copies in the entry, sample preparation method, source organism(s) (if applicable), etc. :param query: a 4-character PDB id code :: p.get_molecules('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("molecules/{}".format(query)) else: res = self.services.http_post("molecules", data=query, frmt="json") return self._return(res) def get_related_publications(self, query): """Return publications obtained from both EuroPMC and UniProt. T These are articles which cite the primary citation of the entry, or open-access articles which mention the entry id without explicitly citing the primary citation of an entry. :param query: a 4-character PDB id code :: p.get_related_publications('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get( "related_publications/{}".format(query)) else: res = self.services.http_post("related_publications/", data=query, frmt="json") return self._return(res) def get_experiment(self, query): """Provides details of experiment(s) carried out in determining the structure of the entry. Each experiment is described in a separate dictionary. For X-ray diffraction, the description consists of resolution, spacegroup, cell dimensions, R and Rfree, refinement program, etc. For NMR, details of spectrometer, sample, spectra, refinement, etc. are included. For EM, details of specimen, imaging, acquisition, reconstruction, fitting etc. are included. :param query: a 4-character PDB id code :: p.get_experiment('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("experiment/{}".format(query)) else: res = self.services.http_post("experiment/{}", data=query, frmt="json") return self._return(res) def get_nmr_resources(self, query): """This call provides URLs of available additional resources for NMR entries. E.g., mapping between structure (PDB) and chemical shift (BMRB) entries. :param query: a 4-character PDB id code :: p.get_nmr_resources('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("nmr_resources/{}".format(query)) else: res = self.services.http_post("nmr_resources/", data=query, frmt="json") return self._return(res) def get_ligand_monomers(self, query): """Provides a a list of modelled instances of ligands, ligands i.e. 'bound' molecules that are not waters. :param query: a 4-character PDB id code :: p.get_ligand_monomers('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("ligand_monomers/{}".format(query)) else: res = self.services.http_post("ligand_monomers", data=query, frmt="json") return self._return(res) def get_modified_residues(self, query): """Provides a list of modelled instances of modified amino acids or nucleotides in protein, DNA or RNA chains. :param query: a 4-character PDB id code :: p.get_modified_residues('4v5j') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("modified_AA_or_NA/{}".format(query)) else: res = self.services.http_post("modified_AA_or_NA", data=query, frmt="json") return self._return(res) def get_mutated_residues(self, query): """Provides a list of modelled instances of mutated amino acids or nucleotides in protein, DNA or RNA chains. :param query: a 4-character PDB id code :: p.get_mutated_residues('1bgj') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("mutated_AA_or_NA/{}".format(query)) else: res = self.services.http_get("mutated_AA_or_NA", data=query, frmt="json") return self._return(res) def get_release_status(self, query): """Provides status of a PDB entry (released, obsoleted, on-hold etc) along with some other information such as authors, title, experimental method, etc. :param query: a 4-character PDB id code :: p.get_release_status('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("status/{}".format(query)) else: res = self.services.http_get("status/{}", data=query, frmt="json") return self._return(res) def get_observed_ranges(self, query): """Provides observed ranges, i.e., segments of structural coverage of polymeric molecues that are modelled fully or partly :param query: a 4-character PDB id code :: p.get_observed_ranges('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("polymer_coverage/{}".format(query)) else: res = self.services.http_post("polymer_coverage", data=query, frmt="json") return self._return(res) def get_observed_ranges_in_pdb_chain(self, query, chain_id): """Provides observed ranges, i.e., segments of structural coverage of polymeric molecules in a particular chain :param query: a 4-character PDB id code :param query: a PDB chain ID :: p.get_observed_ranges_in_pdb_chain('1cbs', "A") """ assert len(query) == 4, "a 4-character PDB id code is required" res = self.services.http_get("polymer_coverage/{}/chain/{}".format( query, chain_id)) return self._return(res) def get_secondary_structure(self, query): """Provides residue ranges of regular secondary structure (alpha helices and beta strands) found in protein chains of the entry. For strands, sheet id can be used to identify a beta sheet. :param query: a 4-character PDB id code :: p.get_secondary_structure('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get( "secondary_structure/{}".format(query)) else: res = self.services.http_post("secondary_structure/", data=query, frmt="json") return self._return(res) def get_residue_listing(self, query): """Provides lists all residues (modelled or otherwise) in the entry. Except waters, along with details of the fraction of expected atoms modelled for the residue and any alternate conformers. :param query: a 4-character PDB id code :: p.get_residue_listing('1cbs') """ assert len(query) == 4, "a 4-character PDB id code is required" if isinstance(query, str) and "," not in query: res = self.services.http_get("residue_listing/{}".format(query)) return self._return(res) def get_residue_listing_in_pdb_chain(self, query, chain_id): """Provides all residues (modelled or otherwise) in the entry Except waters, along with details of the fraction of expected atoms modelled for the residue and any alternate conformers. :param query: a 4-character PDB id code :param query: a PDB chain ID :: p.get_residue_listing_in_pdb_chain('1cbs') """ assert len(query) == 4, "a 4-character PDB id code is required" if isinstance(query, str) and "," not in query: res = self.services.http_get("residue_listing/{}".format( query, chain_id)) return self._return(res) def get_binding_sites(self, query): """Pprovides details on binding sites in the entry STRUCT_SITE records in PDB files (or mmcif equivalent thereof), such as ligand, residues in the site, description of the site, etc. :param query: a 4-character PDB id code :: p.get_binding_sites('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("binding_sites/{}".format(query)) else: res = self.services.http_post("binding_sites", data=query, frmt="json") return self._return(res) def get_files(self, query): """Provides URLs and brief descriptions (labels) for PDB entry Also, for mmcif files, biological assembly files, FASTA file for sequences, SIFTS cross reference XML files, validation XML files, X-ray structure factor file, NMR experimental constraints files, etc. :param query: a 4-character PDB id code :: p.get_files('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("files/{}".format(query)) else: res = self.services.http_post("files", data=query, frmt="json") return self._return(res) def get_observed_residues_ratio(self, query): """Provides the ratio of observed residues for each chain in each molecule The list of chains within an entity is sorted by observed_ratio (descending order), partial_ratio (ascending order), and number_residues (descending order). :param query: a 4-character PDB id code :: p.get_observed_residues_ratio('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get( "observed_residues_ratio/{}".format(query)) else: res = self.services.http_post("observed_residues_ratio", data=query, frmt="json") return self._return(res) def get_assembly(self, query): """Provides information for each assembly of a given PDB ID. T This information is broken down at the entity level for each assembly. The information given includes the molecule name, type and class, the chains where the molecule occur, and the number of copies of each entity in the assembly. :param query: a 4-character PDB id code :: p.get_assembly('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("assembly/{}".format(query)) else: res = self.services.http_post("assembly", data=query, frmt="json") return self._return(res) def get_electron_density_statistics(self, query): """This call details the statistics for electron density. :param query: a 4-character PDB id code :: p.get_electron_density_statistics('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get( "electron_density_statistics/{}".format(query)) else: res = self.services.http_post("electron_density_statistics", data=query, frmt="json") return self._return(res) def get_functional_annotation(self, query): """Provides functional annotation of all ligands, i.e. 'bound' :param query: a 4-character PDB id code :: p.get_functional_annotation('1cbs') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("cofactor/{}".format(query)) else: res = self.services.http_post("cofactor", data=query, frmt="json") return self._return(res) def get_drugbank_annotation(self, query): """This call provides DrugBank annotation of all ligands, i.e. 'bound' :param query: a 4-character PDB id code :: p.get_drugbank_annotation('5hht') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get("drugbank/{}".format(query)) else: res = self.services.http_post("drugbank", data=query, frmt="json") return self._return(res) def get_related_dataset(self, query): """Provides DOI’s for related raw experimental datasets Includes diffraction image data, small-angle scattering data and electron micrographs. :param query: a 4-character PDB id code :: p.get_cofactor('5o8b') """ query = self._check_id(query) if isinstance(query, str) and "," not in query: res = self.services.http_get( "related_experiment_data/{}".format(query)) else: res = self.services.http_post("related_experiment_data", data=query, frmt="json") return self._return(res)
def __init__(self, verbose=True, cache=False): self.services = REST(name="Reactome", url=Reactome._url, verbose="ERROR", cache=False) self.debugLevel = verbose
class Rhea(): """Interface to the `Rhea <http://www.ebi.ac.uk/rhea/rest/1.0/>`_ service You can search by compound name, ChEBI ID, reaction ID, cross reference (e.g., EC number) or citation (author name, title, abstract text, publication ID). You can use double quotes - to match an exact phrase - and the following wildcards: * ? (question mark = one character), * `*` (asterisk = several characters). Searching for caffe* will find reactions with participants such as caffeine, trans-caffeic acid or caffeoyl-CoA:: from bioservices import Rhea r = Rhea() response = r.search("caffe*") Searching for a?e?o* will find reactions with participants such as acetoin, acetone or adenosine.:: from bioservices import Rhea r = Rhea() response = r.search("a?e?o*") The :meth:`search` :meth:`entry` methods require a list of valid columns. By default all columns are used but you can restrict to only a few. Here is the description of the columns: rhea-id : reaction identifier (with prefix RHEA) equation : textual description of the reaction equation chebi : comma-separated list of ChEBI names used as reaction participants chebi-id : comma-separated list of ChEBI identifiers used as reaction participants ec : comma-separated list of EC numbers (with prefix EC) uniprot : number of proteins (UniProtKB entries) annotated with the Rhea reaction pubmed : comma-separated list of PubMed identifiers (without prefix) and 5 cross-references: reaction-xref(EcoCyc) reaction-xref(MetaCyc) reaction-xref(KEGG) reaction-xref(Reactome) reaction-xref(M-CSA) """ _url = "https://www.rhea-db.org" _valid_columns = [ 'rhea-id', 'equation', 'chebi', 'chebi-id', 'ec', 'uniprot', 'pubmed', 'reaction-xref(EcoCyc)', 'reaction-xref(MetaCyc)', 'reaction-xref(KEGG)', 'reaction-xref(Reactome)', 'reaction-ref(M-CSA)' ] def __init__(self, verbose=True, cache=False): """.. rubric:: Rhea constructor :param bool verbose: True by default :: >>> from bioservices import Rhea >>> r = Rhea() """ self.services = REST(name="Rhea", url=Rhea._url, verbose=verbose, cache=cache) def search(self, query, columns=None, limit=None, frmt='tsv'): """Search for Rhea (mimics https://www.rhea-db.org/) :param str query: the search term using format parameter :param str format: the biopax2 or cmlreact format (default) :Returns: A pandas DataFrame. :: >>> r = Rhea() >>> df = r.search("caffeine") >>> df = r.search("caffeine", columns='rhea-id,equation') """ params = {} if limit: params['limit'] = limit if columns: params['columns'] = columns params['format'] = frmt if columns is None: params['columns'] = ",".join(self._valid_columns) response = self.services.http_get("rhea/?query={}".format(query), frmt="txt", params=params) try: import pandas as pd import io df = pd.read_csv(io.StringIO(response), sep='\t') return df except Exception as err: return response def query(self, query, columns=None, frmt="tsv", limit=None): """Retrieve a concrete reaction for the given id in a given format :param str query: the entry to retrieve :param str frmt: the result format (tsv); only tsv accepted for now (Nov 2020). :param int limit: maximum number of results to retrieve :Returns: dataframe Retrieve Rhea reaction identifiers and equation text:: r.query("", columns="rhea-id,equation", limit=10) Retrieve Rhea reactions with enzymes curated in UniProtKB (only first 10 entries):: r.query("uniprot:*", columns="rhea-id,equation", limit=10) To retrieve a specific entry:: df = r.get_entry("rhea:10661") .. versionchanged:: 1.8.0 (entry() method renamed in query() and no more format required. Must be given in the entry name e.g. query("10281.rxn") instead of entry(10281, format="rxn") the option *frmt* is now related to the result format """ params = {"query": query} if limit: params['limit'] = limit if columns: params['columns'] = columns params['format'] = frmt if columns is None: params['columns'] = ",".join(self._valid_columns) response = self.services.http_get("rhea?".format(query), frmt="txt", params=params) try: import pandas as pd import io df = pd.read_csv(io.StringIO(response), sep='\t') return df except Exception as err: return response def get_metabolites(self, rxn_id): """Given a Rhea (http://www.rhea-db.org/) reaction id, returns its participant metabolites as a dict: {metabolite: stoichiometry}, e.g. '2 H + 1 O2 = 1 H2O' would be represented ad {'H': -2, 'O2': -1, 'H2O': 1}. :param rxn_id: Rhea reaction id :return: dict of participant metabolites. """ response = self.entry(rxn_id, frmt="cmlreact") reactants = [xx.attrs['title'] for xx in response.findAll("reactant")] products = [xx.attrs['title'] for xx in response.findAll("product")] return {"reactants": reactants, "products": products} """ms = defaultdict(lambda: 0)
class COG(): """Interface to the COG service from bioservices import COG c = COG() cogs = c.get_all_cogs() # This is a pandas dataframe """ _url = "https://www.ncbi.nlm.nih.gov/research/cog/api" def __init__(self, verbose=False, cache=False): """**Constructor** """ self.services = REST(name="cog", url=COG._url, verbose=verbose, cache=cache) def get_cogs(self, page=1): """Get COGs. Unfortunately, the API sends 10 COGS at a tine given a specific page. The dictionary returned contains the results, count, previous and next page. """ res = self.services.http_get("cog", frmt="json", params={"page": page}) return res def get_cogs_by_gene(self, gene): """Filter COGs by gene tag: MK0280""" res = self.services.http_get("cog", frmt="json", params={"gene": gene}) return res def get_cogs_by_id(self, cog_id): """Filter COGs by COG ID tag: COG0003""" res = self.services.http_get("cog", frmt="json", params={"cog": cog_id}) return res def get_cogs_by_assembly_id(self, assembly_id): """Filter COGs by assembly ID: GCA_000007185.1""" res = self.services.http_get("cog", frmt="json", params={"assembly": assembly_id}) return res def get_cogs_by_orgnanism(self, name): """Filter COGs by organism name: Nitrosopumilus_maritimus_SCM1""" res = self.services.http_get("cog", frmt="json", params={"organism": name}) return res def get_cogs_by_taxon_id(self, taxon_id): """Filter COGs by taxid: 1229908""" res = self.services.http_get("cog", frmt="json", params={"taxid": taxon_id}) return res def get_cogs_by_category(self, category): """Filter COGs by Taxonomic Category: ACTINOBACTERIA""" res = self.services.http_get("cog", frmt="json", params={"category": category}) return res def get_cogs_by_category_id(self, category): """Filter COGs by Taxonomic Category taxid: 651137""" res = self.services.http_get("cog", frmt="json", params={"cat_taxid": category}) return res def get_cogs_by_category_(self, protein): """Filter COGs by Protein name: AJP49128.1""" res = self.services.http_get("cog", frmt="json", params={"protein": protein}) return res # The search keywords (cog, assembly, organism, taxid, category, cat_taxid and protein) #can be combined to filter the COG lists. def get_cogs_by_id_and_category(self, cog_id, category): """Filter COGs by COG id and Taxonomy Categories: COG0004 and CYANOBACTERIA""" res = self.services.http_get("cog", frmt="json", params={ "cog": cog_id, "category": category }) return res def get_cogs_by_id_and_organism(self, cog_id, organism): """Filter COGs by COG id and organism: COG0004 and Escherichia_coli_K-12_sub_MG1655""" res = self.services.http_get("cog", frmt="json", params={ "cog": cog_id, "organism,": organism }) return res def get_all_cogs_definition(self): """Get all COG Definitions:""" res = self.services.http_get("cogdef", frmt="json") return res def get_cog_definition_by_cog_id(self, cog_id): """Get specific COG Definitions by COG: COG0003""" res = self.services.http_get("cogdef", frmt="json", params={"cog": cog_id}) return res def get_cog_definition_by_name(self, cog): """Get specific COG Definitions by name: Thiamin-binding stress-response protein YqgV, UPF0045 family""" res = self.services.http_get("cogdef", frmt="json", params={"name": cog}) return res def get_taxonomic_categories(self): """Get all Taxonomic Categories:""" res = self.services.http_get("taxonomy", frmt="json") return res def get_taxonomic_category_by_name(self, name): """Get specific Taxonomic Category by name: ALPHAPROTEOBACTERIA""" res = self.services.http_get("taxonomy", frmt="json", params={"name": name}) return res
class MyGeneInfo(): """Interface to `mygene.infoe <http://mygene.info>`_ service .. doctest:: >>> from bioservices import MyGeneInfo >>> s = MyGeneInfoe() """ def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages (default is off) """ url = "https://mygene.info/v3" self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache) def get_genes(self, ids, fields="symbol,name,taxid,entrezgene,ensemblgene", species=None, dotfield=True, email=None): """Get matching gene objects for a list of gene ids :param ids: list of geneinfo IDs :param str fields: a comma-separated fields to limit the fields returned from the matching gene hits. The supported field names can be found from any gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot notation as well, e.g., you can pass "refseq.rna". If "fields=all", all available fields will be returned. Default: "symbol,name,taxid,entrezgene,ensemblgene". :param str species: can be used to limit the gene hits from given species. You can use "common names" for nine common species (human, mouse, rat, fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species, you can provide their taxonomy ids. Multiple species can be passed using comma as a separator. Default: human,mouse,rat. :param dotfield: control the format of the returned fields when passed "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True the returned data object contains a single "refseq.rna" field, otherwise (False), a single "refseq" field with a sub-field of "rna". Default: True. :param str email": If you are regular users of this services, the mygeneinfo maintainers/authors encourage you to provide an email, so that we can better track the usage or follow up with you. :: mgi = MyGeneInfoe() mgi.get_genes(("301345,22637")) # first one is rat, second is mouse. This will return a 'notfound' # entry and the second entry as expected. mgi.get_genes("301345,22637", species="mouse") """ params = {"ids": ids, "fields": fields} if email: # pragma: no cover params["email"] = email assert dotfield in [True, False] params["dotfield"] = dotfield if species: params["species"] = species res = self.services.http_post( "gene", #params=params, data=params, frmt="json", headers={ "User-Agent": self.services.getUserAgent(), "accept": "application/json", "Content-Type": "application/x-www-form-urlencoded" }) return res def get_one_gene(self, geneid, fields="symbol,name,taxid,entrezgene,ensemblgene", dotfield=True, email=None): """Get matching gene objects for one gene id :param geneid: a valid gene ID :param str fields: a comma-separated fields to limit the fields returned from the matching gene hits. The supported field names can be found from any gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot notation as well, e.g., you can pass "refseq.rna". If "fields=all", all available fields will be returned. Default: "symbol,name,taxid,entrezgene,ensemblgene". :param dotfield: control the format of the returned fields when passed "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True the returned data object contains a single "refseq.rna" field, otherwise (False), a single "refseq" field with a sub-field of "rna". Default: True. :param str email": If you are regular users of this services, the mygeneinfo maintainers/authors encourage you to provide an email, so that we can better track the usage or follow up with you. :: mgi = MyGeneInfoe() mgi.get_genes("301345") """ params = {"ids": geneid, "fields": fields} if email: # pragma: no cover params["email"] = email assert dotfield in [True, False] params["dotfield"] = dotfield res = self.services.http_get(f"gene/{geneid}", params=params, frmt="json") return res def get_one_query(self, query, email=None, dotfield=True, fields="symbol,name,taxid,entrezgene,ensemblgene", species="human,mouse,rat", size=10, _from=0, sort=None, facets=None, entrezonly=False, ensemblonly=False): """Make gene query and return matching gene list. Support JSONP and CORS as well. :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at", "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed query syntax can be found from our docs. :param str fields: a comma-separated fields to limit the fields returned from the matching gene hits. The supported field names can be found from any gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot notation as well, e.g., you can pass "refseq.rna". If "fields=all", all available fields will be returned. Default: "symbol,name,taxid,entrezgene,ensemblgene". :param str species: can be used to limit the gene hits from given species. You can use "common names" for nine common species (human, mouse, rat, fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species, you can provide their taxonomy ids. Multiple species can be passed using comma as a separator. Default: human,mouse,rat. :param int size: the maximum number of matching gene hits to return (with a cap of 1000 at the moment). Default: 10. :param int _from: the number of matching gene hits to skip, starting from 0. Combining with "size" parameter, this can be useful for paging. Default: 0. :param sort: the comma-separated fields to sort on. Prefix with "-" for descending order, otherwise in ascending order. Default: sort by matching scores in decending order. :param str facets: a single field or comma-separated fields to return facets, for example, "facets=taxid", "facets=taxid,type_of_gene". :param bool entrezonly: when passed as True, the query returns only the hits with valid Entrez gene ids. Default: False. :param bool ensembleonly: when passed as True, the query returns only the hits with valid Ensembl gene ids. Default: False. :param dotfield: control the format of the returned fields when passed "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True the returned data object contains a single "refseq.rna" field, otherwise (False), a single "refseq" field with a sub-field of "rna". Default: True. :param str email": If you are regular users of this services, the mygeneinfo maintainers/authors encourage you to provide an email, so that we can better track the usage or follow up with you. """ params = {"fields": fields, "size": size, "from": _from} if email: # pragma: no cover params["email"] = email assert dotfield in [True, False] params["dotfield"] = dotfield if sort: params["sort"] = sort if facets: # pragma: no cover params["facets"] = sort assert entrezonly in [True, False] params["entrezonly"] = entrezonly assert ensemblonly in [True, False] params["ensemblonly"] = entrezonly res = self.services.http_get(f"query?q={query}", params=params, frmt="json") return res def get_queries( self, query, email=None, dotfield=True, scopes="all", species="human,mouse,rat", fields="symbol,name,taxid,entrezgene,ensemblgene", ): """Make gene query and return matching gene list. Support JSONP and CORS as well. :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at", "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed query syntax can be found from our docs. :param str fields: a comma-separated fields to limit the fields returned from the matching gene hits. The supported field names can be found from any gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot notation as well, e.g., you can pass "refseq.rna". If "fields=all", all available fields will be returned. Default: "symbol,name,taxid,entrezgene,ensemblgene". :param str species: can be used to limit the gene hits from given species. You can use "common names" for nine common species (human, mouse, rat, fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species, you can provide their taxonomy ids. Multiple species can be passed using comma as a separator. Default: human,mouse,rat. :param dotfield: control the format of the returned fields when passed "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True the returned data object contains a single "refseq.rna" field, otherwise (False), a single "refseq" field with a sub-field of "rna". Default: True. :param str email": If you are regular users of this services, the mygeneinfo maintainers/authors encourage you to provide an email, so that we can better track the usage or follow up with you. :param str scopes: not documented. Set to 'all' """ params = {"q": query, "fields": fields, "scopes": scopes} if email: # pragma: no cover params["email"] = email assert dotfield in [True, False] params["dotfield"] = dotfield res = self.services.http_post("query", params=params, frmt="json", headers={ "User-Agent": self.services.getUserAgent(), "accept": "application/json", "Content-Type": "application/x-www-form-urlencoded" }) return res def get_metadata(self): res = self.services.http_get(f"metadata", frmt="json") return res def get_taxonomy(self): res = self.services.http_get(f"metadata", frmt="json") return res['taxonomy']
class Panther(): """Interface to `Panther <http://www.pantherdb.org/services/oai/pantherdb>`_ pages :: >>> from bioservics import Panther >>> p = Panther() >>> p.get_supported_genomes() >>> p.get_ortholog("zap70", 9606) >>> from bioservics import Panther >>> p = Panther() >>> taxon = [x[0]['taxon_id'] for x in p.get_supported_genomes() if "coli" in x['name'].lower()] >>> # you may also use our method called search_organism >>> taxon = p.get_taxon_id(pattern="coli") >>> res = p.get_mapping("abrB,ackA,acuI", taxon) The get_mapping returns for each gene ID the GO terms corresponding to each ID. Those go terms may belong to different categories (see meth:`get_annotation_datasets`): - MF for molecular function - BP for biological process - PC for Protein class - CC Cellular location - Pathway Note that results from the website application http://pantherdb.org/ do not agree with the oupput of the get_mapping service... Try out the dgt gene from ecoli for example """ _url = "http://www.pantherdb.org/services/oai/pantherdb" def __init__(self, verbose=True, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ #super(Panther, self).__init__(name="Panther", url=Panther._url, # verbose=verbose, cache=cache) self.services = REST(name="Panther", url=Panther._url, verbose=verbose, cache=cache) self._allPathwaysURL = "http://www.pantherdb.org/pathway/pathwayList.jsp" def get_pathways(self): """Returns all pathways from pantherdb""" return self.services.http_get("supportedpantherpathways") def get_supported_genomes(self, type=None): """Returns list of supported organisms. :param type: can be chrLoc to restrict the search """ if type is not None: params = {'type': type} else: params = {} res = self.services.http_get("supportedgenomes", params=params) res = [x for x in res["search"]["output"]["genomes"]['genome']] return res def get_taxon_id(self, pattern=None): """return all taxons supported by the service If pattern is provided, we filter the name to keep those that contain the filter. If only one is found, we return the name itself, otherwise a list of candidates """ res = self.get_supported_genomes() if pattern: taxon = [ x['taxon_id'] for x in res if pattern.lower() in x['name'].lower() ] if len(taxon) == 1: return taxon[0] else: return taxon else: taxon = [x["taxon_id"] for x in res] return taxon def get_mapping(self, gene_list, taxon): """Map identifiers Each identifier to be delimited by comma i.e. ',. Maximum of 1000 Identifiers can be any of the following: Ensemble gene identifier, Ensemble protein identifier, Ensemble transcript identifier, Entrez gene id, gene symbol, NCBI GI, HGNC Id, International protein index id, NCBI UniGene id, UniProt accession and UniProt id :param gene_list: see above :param taxon: one taxon ID. See supported :meth:`~bioservices.panther.Panther.get_supported_genomes` If an identifier is not found, information can be found in the unmapped_genes key while found identifiers are in the mapped_genes key. .. warning:: found and not found identifiers are dispatched into unmapped and mapped genes. If there are not found identifiers, the input gene list and the mapped genes list do not have the same length. The input names are not stored in the output. Developpers should be aware of that feature. """ params = {"geneInputList": gene_list, "organism": taxon} res = self.services.http_post("geneinfo", params=params, frmt='json') if "mapped_genes" in res['search']: mapped_genes = res['search']['mapped_genes']['gene'] # if only one identifier, retuns a dictionary. # if several identifiers, returns a list of dictionary. # We will be consistent and return a list if "accession" in mapped_genes: mapped_genes = [mapped_genes] else: mapped_genes = [{}] if "unmapped_list" in res['search']: unmapped_genes = res['search']['unmapped_list']["unmapped"] if isinstance(unmapped_genes, list): pass else: unmapped_genes = [unmapped_genes] else: unmapped_genes = [] logger.warning("Some identifiers were not found") return {"unmapped": unmapped_genes, "mapped": mapped_genes} def get_enrichment(self, gene_list, organism, annotation, enrichment_test="Fisher", correction="FDR", ref_gene_list=None): """Returns over represented genes Compares a test gene list to a reference gene list, and determines whether a particular class (e.g. molecular function, biological process, cellular component, PANTHER protein class, the PANTHER pathway or Reactome pathway) of genes is overrepresented or underrepresented. :param organism: a valid taxon ID :param enrichment_test: either **Fisher** or **Binomial** test :param correction: correction for multiple testing. Either **FDR**, **Bonferonni**, or **None**. :param annotation: one of the supported PANTHER annotation data types. See :meth:`~bioservices.panther.Panther.get_annotation_datasets` to retrieve a list of supported annotation data types :param ref_gene_list: if not specified, the system will use all the genes for the specified organism. Otherwise, a list delimited by comma. Maximum of 100000 Identifiers can be any of the following: Ensemble gene identifier, Ensemble protein identifier, Ensemble transcript identifier, Entrez gene id, gene symbol, NCBI GI, HGNC Id, International protein index id, NCBI UniGene id, UniProt accession andUniProt id. :return: a dictionary with the following keys. 'reference' contains the orgnaism, 'input_list' is the input gene list with unmapped genes. 'result' contains the list of candidates. :: >>> from bioservices import Panther >>> p = Panther() >>> res = p.get_enrichment('zap70,mek1,erk', 9606, "GO:0008150") >>> For molecular function, use : >>> res = p.get_enrichment('zap70,mek1,erk', 9606, "ANNOT_TYPE_ID_PANTHER_GO_SLIM_MF") """ assert enrichment_test.lower() in ['fisher', 'binomial'] if correction is None: correction = 'none' assert correction.lower() in ['fdr', 'bonferroni', 'none'] # This is a bug in panther DB where they used bonferonni . should be # bonferroni... if correction.lower() == "bonferroni": correction = "bonferonni" assert annotation in [x['id'] for x in self.get_annotation_datasets()] params = {'enrichmentTestType': enrichment_test.upper()} params['organism'] = organism if gene_list: params['geneInputList'] = gene_list if ref_gene_list: params['refInputList'] = ref_gene_list params['annotDataSet'] = annotation params['correction'] = correction.upper() try: res = self.services.http_post("enrich/overrep", params=params, frmt="json") try: return res['results'] except: return res except: return res def get_annotation_datasets(self): """Retrieve the list of supported annotation data sets""" res = self.services.http_get("supportedannotdatasets") res = res["search"]["annotation_data_sets"]["annotation_data_type"] return res def get_ortholog(self, gene_list, organism, target_organism=None, ortholog_type="all"): """search for matching orthologs in target organisms. Searches for matching orthologs in the gene family that contains the search gene associated with the search terms. Returns ortholog genes in target organisms given a search organism, the search terms and a list of target organisms. :param gene_list: :param organism: a valid taxon ID :param target_organism: zero or more taxon IDs separated by ','. See :meth:`~bioservices.panther.Panther.get_supported_genomes` :param ortholog_type: optional parameter to specify ortholog type of target organism :return: a dictionary with "mapped" and "unmapped" keys, each of them being a list. For each unmapped gene, a dictionary with id and organism is is returned. For the mapped gene, a list of ortholog is returned. """ assert ortholog_type in ['LDO', 'all'] params = { "geneInputList": gene_list, "organism": organism, "targetOrganism": target_organism, "orthologType": ortholog_type } if params['targetOrganism'] is None: del params['targetOrganism'] res = self.services.http_get("ortholog/matchortho", frmt='json', params=params) res = res['search']['mapping'] mapped = res['mapped'] try: unmapped = res['unmapped_ids']['unmapped'] # make sure we always have a list if isinstance(unmapped, dict): unmapped = [unmapped] except: unmapped = [] res = {"unmapped": unmapped, "mapped": mapped} return res def get_homolog_position(self, gene, organism, position, ortholog_type="all"): """ :param gene: Can be any of the following: Ensemble gene identifier, Ensemble protein identifier, Ensemble transcript identifier, Entrez gene id, gene symbol, NCBI GI, HGNC Id, International protein index id, NCBI UniGene id, UniProt accession andUniProt id :param organism: a valid taxon ID :param ortholog_type: optional parameter to specify ortholog type of target organism """ if "," in gene: logger.warning( "did not expect a comma. Please provide only one gene name") assert ortholog_type in ['LDO', 'all'] assert position >= 1 params = { "gene": gene, "organism": organism, "pos": position, "orthologType": ortholog_type } res = self.services.http_get("ortholog/homologpos", params=params, frmt="json") res = res['search']['mapping'] if "mapped" in res.keys(): res = res['mapped'] return res elif "unmapped_ids" in res.keys(): logger.warning("did not find any match for {}".format(gene)) return res["unmapped_ids"] def get_supported_families(self, N=1000, progress=True): """Returns the list of supported PANTHER family IDs This services returns only 1000 items per request. This is defined by the index. For instance index set to 1 returns the first 1000 families. Index set to 2 returns families between index 1000 and 2000 and so on. As of 20 Feb 2020, there was about 15,000 families. This function simplifies your life by calling the service as many times as required. Therefore it returns all families in one go. """ from easydev import Progress params = {'startIndex': 1} res = self.services.http_get("supportedpantherfamilies", params=params) results = res['search']['panther_family_subfam_list']['family'] if len(results) != N: msg = "looks like the services changed. Call this function with N={}" msg = msg.format(len(results)) raise ValueError(msg) number_of_families = res['search']['number_of_families'] pb = Progress(int(number_of_families / N)) pb.animate(1) for i in range(1, int(number_of_families / N) + 1): params = {'startIndex': i * N + 1} res = self.services.http_get("supportedpantherfamilies", params=params) data = res['search']['panther_family_subfam_list']['family'] results.extend(data) if progress: pb.animate(i) return results def get_family_ortholog(self, family, taxon_list=None): """Search for matching orthologs in target organisms Also return the corresponding position in the target organism sequence. The system searches for matching orthologs in the gene family that contains the search gene associated with the search term. :param family: Family ID :param taxon_list: Zero or more taxon IDs separated by ','. """ params = {"family": family} if taxon_list: params['taxonFltr'] = taxon_list res = self.services.http_get("familyortholog", params=params, frmt="json") return res['search']['ortholog_list']['ortholog'] def get_family_msa(self, family, taxon_list=None): """Returns MSA information for the specified family. :param family: family ID :param taxon_list: Zero or more taxon IDs separated by ','. """ params = {"family": family} if taxon_list: params['taxonFltr'] = taxon_list res = self.services.http_get("familymsa", params=params, frmt="json") return res['search']['MSA_list']['sequence_info'] def get_tree_info(self, family, taxon_list=None): """Returns tree topology information and node attributes for the specified family. :param family: Family ID :param taxon_list: Zero or more taxon IDs separated by ','. """ params = {"family": family} if taxon_list: params['taxonFltr'] = taxon_list res = self.services.http_get("treeinfo", params=params, frmt="json") return res['search'] #['tree_topology']['annotation_node']
class Reactome(): """ .. todo:: interactors, orthology, particiapnts, person, query, refernces, schema """ _url = "https://reactome.org/ContentService" def __init__(self, verbose=True, cache=False): self.services = REST(name="Reactome", url=Reactome._url, verbose="ERROR", cache=False) self.debugLevel = verbose @property def version(self): return self.services.http_get("data/database/version", frmt="txt") @property def name(self): return self.services.http_get("data/database/name", frmt="txt") def get_discover(self, identifier): """The schema.org for an Event in Reactome knowledgebase For each event (reaction or pathway) this method generates a json file representing the dataset object as defined by schema.org (http). This is mainly used by search engines in order to index the data :: r.data_discover("R-HSA-446203") """ res = self.services.http_get("data/discover/{}".format(identifier), frmt="json") return res def get_diseases(self): """list of diseases objects""" return self.services.http_get("data/diseases", frmt="json") def get_diseases_doid(self): """retrieves the list of disease DOIDs annotated in Reactome return: dictionary with DOID contained in the values() """ res = self.services.http_get("data/diseases/doid", frmt="txt") res = dict([x.split() for x in res.split("\n")]) return res def get_interactors_psicquic_molecule_details(self): """Retrieve clustered interaction, sorted by score, of a given accession by resource.""" raise NotImplementedError def get_interactors_psicquic_molecule_summary(self): """Retrieve a summary of a given accession by resource""" raise NotImplementedError def get_interactors_psicquic_resources(self): """Retrieve a list of all Psicquic Registries services""" raise NotImplementedError def get_interactors_static_molecule_details(self): """Retrieve a detailed interaction information of a given accession""" raise NotImplementedError def get_interactors_static_molecule_pathways(self): """Retrieve a list of lower level pathways where the interacting molecules can be found""" raise NotImplementedError def get_interactors_static_molecule_summary(self): """Retrieve a summary of a given accession""" raise NotImplementedError def get_exporter_fireworks(self): raise NotImplementedError def get_exporter_reaction(self): raise NotImplementedError def get_exporter_diagram(self, identifier, ext="png", quality=5, diagramProfile="Modern", analysisProfile="Standard", filename=None): """Export a given pathway diagram to raster file This method accepts identifiers for Event class instances. When a diagrammed pathway is provided, the diagram is exported to the specified format. When a subpathway is provided, the diagram for the parent is exported and the events that are part of the subpathways are selected. When a reaction is provided, the diagram containing the reaction is exported and the reaction is selected. :param identifier: Event identifier (it can be a pathway with diagram, a subpathway or a reaction) :param ext: File extension (defines the image format) in png, jpeg, jpg, svg, gif :param quality: Result image quality between [1 - 10]. It defines the quality of the final image (Default 5) :param flg: not implemented :param sel: not implemented :param diagramProfile: Diagram Color Profile :param token: not implemented :param analysisProfile: Analysis Color Profile :param expColumn: not implemented :param filename: if given, save the results in the provided filename return: raw data if filename parameter is not set. Otherwise, the data is saved in the filename and the function returns None """ assert ext in ['png', 'jpg', 'jpeg', 'svg', "gif"] assert quality in range(11) assert diagramProfile in ["Modern", "Standard"] assert analysisProfile in ["Standard", "Strosobar", "Copper Plus"] params = { "diagramProfile": diagramProfile, "analysisProfile": analysisProfile, "quality": quality } res = self.services.http_get("exporter/diagram/{}.{}".format( identifier, ext), params=params, frmt=ext) if filename: if ext != "svg": with open(filename, "wb") as fout: fout.write(res) else: with open(filename, "w") as fout: fout.write(content) else: return res def get_complex_subunits(self, identifier, excludeStructuresSpecifies=False): """A list with the entities contained in a given complex Retrieves the list of subunits that constitute any given complex. In case the complex comprises other complexes, this method recursively traverses the content returning each contained PhysicalEntity. Contained complexes and entity sets can be excluded setting the ‘excludeStructures’ optional parameter to ‘true’ :param identifier: The complex for which subunits are requested :param excludeStructures: Specifies whether contained complexes and entity sets are excluded in the response :: r.get_complex_subunits("R-HSA-5674003") """ params = {"excludeStructuresSpecifies": excludeStructuresSpecifies} res = self.services.http_get( "data/complex/{}/subunits".format(identifier), params=params, frmt="json") return res def get_complexes(self, resources, identifier): """A list of complexes containing the pair (identifier, resource) Retrieves the list of complexes that contain a given (identifier, resource). The method deconstructs the complexes into all its participants to do so. :param resource: The resource of the identifier for complexes are requested (e.g. UniProt) :param identifier: The identifier for which complexes are requested :: r.get_complexes(resources, identifier) r.get_complexes("UniProt", "P43403") """ res = self.services.http_get("data/complexes/{}/{}".format( resources, identifier), frmt="json") return res def get_entity_componentOf(self, identifier): """A list of larger structures containing the entity Retrieves the list of structures (Complexes and Sets) that include the given entity as their component. It should be mentioned that the list includes only simplified entries (type, names, ids) and not full information about each item. :: r.get_entity_componentOf("R-HSA-199420") """ res = self.services.http_get( "data/entity/{}/componentOf".format(identifier), frmt="json") return res def get_entity_otherForms(self, identifier): """All other forms of PhysicalEntity Retrieves a list containing all other forms of the given PhysicalEntity. These other forms are PhysicalEntities that share the same ReferenceEntity identifier, e.g. PTEN H93R[R-HSA-2318524] and PTEN C124R[R-HSA-2317439] are two forms of PTEN. :: r.get_entity_otherForms("R-HSA-199420") """ res = self.services.http_get( "data/entity/{}/otherForms".format(identifier), frmt="json") return res def get_event_ancestors(self, identifier): """The ancestors of a given event The Reactome definition of events includes pathways and reactions. Although events are organised in a hierarchical structure, a single event can be in more than one location, i.e. a reaction can take part in different pathways while, in the same way, a sub-pathway can take part in many pathways. Therefore, this method retrieves a list of all possible paths from the requested event to the top level pathway(s). :param identifier: The event for which the ancestors are requested :: r.get_event_ancestors("R-HSA-5673001") """ res = self.services.http_get( "data/event/{}/ancestors".format(identifier), frmt="json") return res def get_eventsHierarchy(self, species): """The full event hierarchy for a given species Events (pathways and reactions) in Reactome are organised in a hierarchical structure for every species. By following all ‘hasEvent’ relationships, this method retrieves the full event hierarchy for any given species. The result is a list of tree structures, one for each TopLevelPathway. Every event in these trees is represented by a PathwayBrowserNode. The latter contains the stable identifier, the name, the species, the url, the type, and the diagram of the particular event. :param species: Allowed species filter: SpeciesName (eg: H**o sapiens) SpeciesTaxId (eg: 9606) :: r.get_eventsHierarchy(9606) """ res = self.services.http_get("data/eventsHierarchy/{}".format(species), frmt="json") return res def get_exporter_sbml(self, identifier): """Export given Pathway to SBML :param identifier: DbId or StId of the requested database object :: r.exporter_sbml("R-HSA-68616") """ res = self.services.http_get("exporter/sbml/{}.xml".format(identifier), frmt="xml") return res def get_pathway_containedEvents(self, identifier): """All the events contained in the given event Events are the building blocks used in Reactome to represent all biological processes, and they include pathways and reactions. Typically, an event can contain other events. For example, a pathway can contain smaller pathways and reactions. This method recursively retrieves all the events contained in any given event. :: res = r.get_pathway_containedEvents("R-HSA-5673001") """ res = self.services.http_get( "data/pathway/{}/containedEvents".format(identifier), frmt="json") return res def get_pathway_containedEvents_by_attribute(self, identifier, attribute): """A single property for each event contained in the given event Events are the building blocks used in Reactome to represent all biological processes, and they include pathways and reactions. Typically, an event can contain other events. For example, a pathway can contain smaller pathways (subpathways) and reactions. This method recursively retrieves a single attribute for each of the events contained in the given event. :param identifier: The event for which the contained events are requested :param attribute: Attrubute to be filtered :: r.get_pathway_containedEvents_by_attribute("R-HSA-5673001", "stId") """ res = self.services.http_get( "data/pathway/{}/containedEvents/{}".format(identifier, attribute), frmt="txt") try: res = [x.strip() for x in res[1:-1].split(",")] except: pass return res def get_pathways_low_diagram_entity(self, identifier): """A list of lower level pathways with diagram containing a given entity or event This method traverses the event hierarchy and retrieves the list of all lower level pathways that have a diagram and contain the given PhysicalEntity or Event. :param identifier: The entity that has to be present in the pathways :param species: The species for which the pathways are requested. Taxonomy identifier (eg: 9606) or species name (eg: ‘H**o sapiens’) :: r.get_pathways_low_diagram_entity("R-HSA-199420") """ res = self.services.http_get( "data/pathways/low/diagram/entity/{}".format(identifier), frmt="json") return res def get_pathways_low_diagram_entity_allForms(self, identifier): """ :: r.get_pathways_low_diagram_entity_allForms("R-HSA-199420") """ res = self.services.http_get( "data/pathways/low/diagram/entity/{}/allForms".format(identifier), frmt="json") return res def get_pathways_low_diagram_identifier_allForms(self, identifier): """ :: r.get_pathways_low_diagram_identifier_allForms("PTEN") """ res = self.services.http_get( "data/pathways/low/diagram/identifier/{}/allForms".format( identifier), frmt="json") return res def get_pathways_low_entity(self, identifier): """A list of lower level pathways containing a given entity or event This method traverses the event hierarchy and retrieves the list of all lower level pathways that contain the given PhysicalEntity or Event. :: r.get_pathways_low_entity("R-HSA-199420") """ res = self.services.http_get( "data/pathways/low/entity/{}".format(identifier), frmt="json") return res def get_pathways_low_entity_allForms(self, identifier): """A list of lower level pathways containing any form of a given entity This method traverses the event hierarchy and retrieves the list of all lower level pathways that contain the given PhysicalEntity in any of its variant forms. These variant forms include for example different post-translationally modified versions of a single protein, or the same chemical in different compartments. :: r.get_pathways_low_entity_allForms("R-HSA-199420") """ res = self.services.http_get( "data/pathways/low/entity/{}/allForms".format(identifier), frmt="json") return res def get_pathways_top(self, species): res = self.services.http_get("data/pathways/top/{}".format(species), frmt="json") return res def get_references(self, identifier): """All referenceEntities for a given identifier Retrieves a list containing all the reference entities for a given identifier. :: r.get_references(15377) """ res = self.services.http_get( "references/mapping/{}".format(identifier), frmt="json") return res def get_mapping_identifier_pathways(self, resource, identifier): res = self.services.http_get("data/mapping/{}/{}/pathways".format( resource, identifier), frmt="json") return res def get_mapping_identifier_reactions(self, resource, identifier): res = self.services.http_get("data/mapping/{}/{}/reactions".format( resource, identifier), frmt="json") def search_facet(self): """A list of facets corresponding to the whole Reactome search data This method retrieves faceting information on the whole Reactome search data. """ res = self.services.http_get("search/facet", frmt="json") return res def search_facet_query(self, query): """A list of facets corresponding to a specific query This method retrieves faceting information on a specific query """ res = self.services.http_get( "search/facet_query?query={}".format(query), frmt="json") return res def search_query(self, query): """Queries Solr against the Reactome knowledgebase This method performs a Solr query on the Reactome knowledgebase. Results can be provided in a paginated format. """ res = self.services.http_get("search/query?query={}".format(query), frmt="json") return res def search_spellcheck(self, query): """Spell-check suggestions for a given query This method retrieves a list of spell-check suggestions for a given search term. """ res = self.services.http_get( "search/spellcheck?query={}".format(query), frmt="json") return res def search_suggest(self, query): """Autosuggestions for a given query This method retrieves a list of suggestions for a given search term. :: >>> r.http_get("search/suggest?query=apopt") ['apoptosis', 'apoptosome', 'apoptosome-mediated', 'apoptotic'] """ res = self.services.http_get( "search/suggest?query={}".format(identifier), frmt="json") return res def get_species_all(self): """the list of all species in Reactome""" res = self.services.http_get("data/species/all", frmt="json") return res def get_species_main(self): """the list of main species in Reactome :: r.get_species_main() """ res = self.services.http_get("data/species/main", frmt="json") return res
class Seqret(): """Interface to the `Seqret <http://www.ebi.ac.uk/readseq>`_ service :: >>> from bioservices import * >>> s = Seqret() The ReadSeq service was replaced by #the Seqret services (2015). .. versionchanged:: 0.15 """ def __init__(self, verbose=True): """.. rubric:: Constructor :param bool verbose: """ url = "https://www.ebi.ac.uk/Tools/services/rest/emboss_seqret" self.services = REST(name="seqret", url=url, verbose=verbose) self._parameters = None def get_parameters(self): """Get a list of the parameter names. :returns: a list of strings giving the names of the parameters. """ parameters = self.services.http_get("parameters", frmt="json") return parameters['parameters'] def _get_parameters(self): if self._parameters: return self._parameters else: res = self.get_parameters() self._parameters = res return self._parameters parameters = property(_get_parameters, doc="Get list of parameter names") def get_parameter_details(self, parameterId): """Get details of a specific parameter. :param str parameter: identifier/name of the parameter to fetch details of. :return: a data structure describing the parameter and its values. :: rs = ReadSeq() print(rs.get_parameter_details("stype")) """ if parameterId not in self.parameters: raise ValueError( "Invalid parameterId provided(%s). See parameters attribute" % parameterId) request = "parameterdetails/" + parameterId res = self.services.http_get(request, frmt="json") return res def run(self, email, title, **kargs): """Submit a job to the service. :param str email: user e-mail address. :param str title: job title. :param params: parameters for the tool as returned by :meth:`get_parameter_details`. :return: string containing the job identifier (jobId). Deprecated (olf readseq service):: Format Name Value Auto-detected 0 EMBL 4 GenBank 2 Fasta(Pearson) 8 Clustal/ALN 22 ACEDB 25 BLAST 20 DNAStrider 6 FlatFeat/FFF 23 GCG 5 GFF 24 IG/Stanford 1 MSF 15 NBRF 3 PAUP/NEXUS 17 Phylip(Phylip4) 12 Phylip3.2 11 PIR/CODATA 14 Plain/Raw 13 SCF 21 XML 19 As output, you also have Pretty 18 :: s = readseq.Seqret() jobid = s.run("*****@*****.**", "test", sequence=fasta, inputformat=8, outputformat=2) genbank = s.get_result(s._jobid) """ for k in kargs.keys(): self.services.devtools.check_param_in_list(k, self.parameters) assert "sequence" in kargs.keys() params = {"email": email, "title": title} for k in [ 'stype', 'inputformat', 'outputformat', "feature", "firstonly", "reverse", 'outputcase', 'seqrange' ]: if k in kargs.keys(): value = kargs.get(k) details = self.get_parameter_details(k) valid_values = [ x['value'] for x in details['values']['values'] ] self.services.devtools.check_param_in_list( str(value), valid_values) params[k] = value #r = requests.post(url + "/run?", data={"sequence":fasta, "stype": "protein", #"inputformat":"raw", "outputformat":"fasta", "email":"*****@*****.**", #"title":"test"}) params['sequence'] = kargs['sequence'] jobid = self.services.http_post("run", frmt="txt", data=params) self._jobid = jobid return jobid def get_status(self, jobid=None): """Get the status of a submitted job. :param str jobid: job identifier. :return: string containing the status. The values for the status are: - RUNNING: the job is currently being processed. - FINISHED: job has finished, and the results can then be retrieved. - ERROR: an error occurred attempting to get the job status. - FAILURE: the job failed. - NOT_FOUND: the job cannot be found. """ res = self.services.http_get("status/{}".format(jobid), frmt="txt") return res def get_result_types(self, jobid): """Get the available result types for a finished job. :param str jobid: job identifier. :return: a list of wsResultType data structures describing the available result types. """ res = self.services.http_get("resulttypes/{}".format(jobid), frmt="json") return [x['identifier'] for x in res["types"]] def get_result(self, jobid, result_type="out"): """Get the result of a job of the specified type. :param str jobid: job identifier. :param parameters: optional list of wsRawOutputParameter used to provide additional parameters for derived result types. """ if self.get_status(jobid) != 'FINISHED': self.services.logging.warning( "Your job is not finished yet. Try again later.") return #result_types = self.get_result_types(jobid) #assert parameters in result_types res = self.services.http_get("result/{}/{}".format(jobid, result_type), frmt="txt") return res
class ENA(): """Interface to `ChEMBL <http://www.ebi.ac.uk/ena/index.php>`_ Here is a quick example to retrieve a target given its ChEMBL Id .. doctest:: >>> from bioservices import ENQ >>> s = ENA(verbose=False) Retrieve read domain metadata in XML format:: print(e.get_data('ERA000092', 'xml')) Retrieve assemble and annotated sequences in fasta format:: print(e.get_data('A00145', 'fasta')) The range parameter can be used in combination to retrieve a subsequence from sequence entry A00145 from bases 3 to 63 using :: e.get_data('A00145', 'fasta', fasta_range=[3,63]) Retrieve assembled and annotated subsequences in HTML format (same as above but in HTML page). e.view_data('A00145') Retrieve expanded CON records: To retrieve expanded CON records use the expanded=true parameter. For example, the expanded CON entry AL513382 in flat file format can be i obtained as follows:: e.get_data('AL513382', frmt='text', expanded=True) Expanded CON records are different from CON records in two ways. Firstly, the expanded CON records contain the full sequence in addition to the contig assembly instructions. Secondly, if a CON record contains only source or gap features the expanded CON records will also display all features from the segment records. Retrieve assembled and annotated sequence header in flat file format To retrieve assembled and annotated sequence header in flat file format please use the header=true parameter, e.g.: e.get_data('BN000065', 'text', header=True) Retrieve assembled and annotated sequence records using sequence versions:: e.get_data('AM407889.1', 'fasta') e.get_data('AM407889.2', 'fasta') """ url = "http://www.ebi.ac.uk/ena/browser/api" def __init__(self, verbose=False, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ self.services = REST(name="ENA", url=ENA.url, verbose=verbose, cache=cache) self.services.TIMEOUT = 100 def get_data(self, identifier, frmt, fasta_range=None, expanded=None, header=None, download=None): """ :param frmt : xml, text, fasta, fastq, html, embl but does depend on the entry Example: get_data("/AL513382", "embl") ENA API changed in 2020 but we tried to keep the same services in this method. """ url = f"{self.url}/{frmt}/{identifier}" if frmt in ['text', 'fasta', 'fastq']: res = self.services.http_get(url, frmt="txt") elif frmt in ['html']: res = self.services.http_get(url, frmt="default") elif frmt in ['xml']: res = self.services.http_get(url, frmt="xml") return res def data_warehouse(self): #http://www.ebi.ac.uk/ena/data/warehouse/search?query="geo_circ(-0.587,-90.5713,170)"&result=sequence_release&display=text&download=gzip pass def get_taxon(self, taxon): print("deprecated since v.7.8 due to ENA update")
class ArrayExpress(): """Interface to the `ArrayExpress <http://www.ebi.ac.uk/arrayexpress>`_ service ArrayExpress allows to retrieve data sets used in various experiments. **QuickStart** Given an experiment name (e.g., E-MEXP-31), type:: s = ArrayExpress() s.getAE('E-MEXP-31') You can also quickyl retrieve experiments matching some search queries as follows:: a.queryAE(keywords="pneumonia", species='h**o+sapiens') Now let us look at other methods.If you know the file and experiment name, you can retrieve a specific file as follows:: >>> from bioservices import ArrayExpress >>> s = ArrayExpress() >>> # retrieve a specific file from a experiment >>> res = s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt") The main issue is that you may not know the experiment you are looking for. You can query experiments by keyword:: >>> # Search for experiments >>> res = s.queryExperiments(keywords="cancer+breast", wholewords=True) keywords used in queries follows these rules: * Accession number and keyword searches are case insensitive * More than one keyword can be searched for using the + sign (e.g. keywords="cancer+breast") * Use an asterisk as a multiple character wild card (e.g. keywords="colo*") * use a question mark ? as a single character wild card (e.g. keywords="te?t") More complex queries can be constructed using the operators AND, OR or NOT. AND is the default if no operator is specified. Either experiments or files can be searched for. Examples are:: keywords="prostate+AND+breast" keywords="prostate+breast" # same as above keywords="prostate+OR+breast" keywords="prostate+NOT+breast " The returned objects are XML parsed with beautifulSoup. You can get all experiments using the getChildren method: .. doctest:: :options: +SKIP >>> res = s.queryExperiments(keywords="breast+cancer") >>> len(res.getchildren()) 1487 If you know what you are looking for, you can give the experiment name:: >>> res = s.retrieveExperiment("E-MEXP-31") >>> exp = res.getchildren()[0] # it contains only one experiment >>> [x.text for x in exp.getchildren() if x.tag == "name"] ['Transcription profiling of mammalian male germ cells undergoing mitotic growth, meiosis and gametogenesis in highly enriched cell populations'] Using the same example, you can retrieve the names of the files related to the experiment:: >>> files = [x.getchildren() for x in exp.getchildren() if x.tag == "files"] >>> [x.get("name") for x in files[0]] ['E-MEXP-31.raw.1.zip', 'E-MEXP-31.processed.1.zip', 'E-MEXP-31.idf.txt', 'E-MEXP-31.sdrf.txt'] New in version 1.3.7 you can use the method :meth:`getEA` Then, you may want to download a particular file:: >>> s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt") .. seealso:: :meth:`queryFiles` for more details about the parameters to be used in queries. .. warning:: supports only new style (v2). You can still use the old style by setting the request manually using the :meth:`version`. .. warning:: some syntax requires the + character, which is a special character for http requests. It is replaced internally by spaces if found .. warning:: filtering is not implemented (e.g., assaycount:[x TO y]syntax.) """ def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: prints informative messages """ self.services = REST(name="ArrayExpress", url="http://www.ebi.ac.uk/arrayexpress", cache=cache, verbose=verbose) self.version = "v2" def _search(self, mode, **kargs): """common function to search for files or experiments""" assert mode in ["experiments", "files"] url = "{0}/{1}/{2}".format("json", self.version, mode) defaults = { "accession": None, #ex: E-MEXP-31 "keywords": None, "species": None, "wholewords": "on", "expdesign": None, "exptype": None, "gxa": "true", "pmid": None, "sa": None, "ef": None, # e.g., CellType "efv": None, # e.g., HeLa "array": None, # ex: A-AFFY-33 "expandfo": "on", "directsub": "true", "sortby": [ "accession", "name", "assays", "species", "releasedate", "fgem", "raw", "atlas" ], "sortorder": ["ascending", "descending"], } for k in kargs.keys(): if k not in defaults.keys(): raise ValueError( "Incorrect value provided ({}). Correct values are {}". format(k, sorted(defaults.keys()))) #if len(kargs.keys()): # url += "?" params = {} for k, v in kargs.items(): if k in ["expandfo", "wholewords"]: if v in ["on", True, "true", "TRUE", "True"]: #params.append(k + "=on") params[k] = "on" elif k in ["gxa", "directsub"]: if v in ["on", True, "true", "TRUE", "True"]: #params.append(k + "=true") params[k] = "true" elif v in [False, "false", "False"]: #params.append(k + "=false") params[k] = "false" else: raise ValueError("directsub must be true or false") else: if k in ["sortby", "sortorder"]: self.services.devtools.check_param_in_list(v, defaults[k]) #params.append(k + "=" + v) params[k] = v # NOTE: + is a special character that is replaced by %2B # The + character is the proper encoding for a space when quoting # GET or POST data. Thus, a literal + character needs to be escaped # as well, lest it be decoded to a space on the other end for k, v in params.items(): params[k] = v.replace("+", " ") self.services.logging.info(url) res = self.services.http_get(url, frmt="json", params=params) return res def queryFiles(self, **kargs): """Retrieve a list of files associated with a set of experiments The following parameters are used to search for experiments/files: :param str accession: experiment primary or secondary accession e.g. E-MEXP-31 :param str array: array design accession or name e.g., A-AFFY-33 :param str ef: Experimental factor, the name of the main variables in an experiment. (e.g., CellType) :param str efv: Experimental factor value. Has EFO expansion. (e.g., HeLa) :param str expdesign: Experiment design type (e.g., "dose+response") :param str exptype: Experiment type. Has EFO expansion. (e.g., "RNA-seq") :param str gxa: Presence in the Gene Expression Atlas. Only value is gxa=true. :param str keywords: e.g. "cancer+breast" :param str pmid: PubMed identifier (e.g., 16553887) :param str sa: Sample attribute values. Has EFO expansion. fibroblast :param str species: Species of the samples.Has EFO expansion. (e.g., "h**o+sapiens") :param bool wholewords: The following parameters can filter the experiments: :param str directsub: only experiments directly submitted to ArrayExpress (true) or only imported from GEO databae (false) The following parameters can sort the results: :param str sortby: sorting by grouping (can be accession, name, assays, species, releasedata, fgem, raw, atlas) :param str sortorder: sorting by orderering. Can be either ascending or descending (default) .. doctest:: :options: +SKIP >>> from bioservices import ArrayExpress >>> s = ArrayExpress() >>> res = s.queryFiles(keywords="cancer+breast", wholewords=True) >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o Sapiens") >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o Sapiens", ... sortorder="releasedate") >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o+Sapiens", ... expdesign="dose response", sortby="releasedate", sortorder="ascending") >>> dates = [x.findall("releasedate")[0].text for x in res.getchildren()] """ res = self._search("files", **kargs) return res def queryExperiments(self, **kargs): """Retrieve experiments .. seealso:: :meth:`~bioservices.arrayexpress.ArrayExpress.queryFiles` for all possible keywords .. doctest:: :options: +SKIP >>> res = s.queryExperiments(keywords="cancer+breast", wholewords=True) """ res = self._search("experiments", **kargs) return res def retrieveExperiment(self, experiment): """alias to queryExperiments if you know the experiment name :: >>> s.retrieveExperiment("E-MEXP-31") >>> # equivalent to >>> s.queryExperiments(accession="E-MEXP-31") """ res = self.queryExperiments(keywords=experiment) return res def retrieveFile(self, experiment, filename, save=False): """Retrieve a specific file from an experiment :param str filename: :: >>> s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt") """ files = self.retrieveFilesFromExperiment(experiment) assert filename in files, """Error. Provided filename does not seem to be correct. Files available for %s experiment are %s """ % (experiment, files) url = "files/" + experiment + "/" + filename if save: res = self.services.http_get(url, frmt="txt") f = open(filename, "w") f.write(res) f.close() else: res = self.services.http_get(url, frmt="txt") return res def retrieveFilesFromExperiment(self, experiment): """Given an experiment, returns the list of files found in its description :param str experiment: a valid experiment name :return: the experiment files .. doctest:: >>> from bioservices import ArrayExpress >>> s = ArrayExpress(verbose=False) >>> s.retrieveFilesFromExperiment("E-MEXP-31") ['E-MEXP-31.raw.1.zip', 'E-MEXP-31.processed.1.zip', 'E-MEXP-31.idf.txt', 'E-MEXP-31.sdrf.txt'] """ res = self.queryExperiments(keywords=experiment) exp = res['experiments']['experiment'] files = exp['files'] output = [v['name'] for k, v in files.items() if k] return output def queryAE(self, **kargs): """Returns list of experiments See :meth:`queryExperiments` for parameters and usage This is a wrapper around :meth:`queryExperiments` that returns only the accession values. :: a.queryAE(keywords="pneumonia", species='h**o+sapiens') """ sets = self.queryExperiments(**kargs) return [x['accession'] for x in sets['experiments']['experiment']] def getAE(self, accession, type='full'): """retrieve all files from an experiments and save them locally""" filenames = self.retrieveFilesFromExperiment(accession) self.services.logging.info("Found %s files" % len(filenames)) for i, filename in enumerate(filenames): res = self.retrieveFile(accession, filename) if filename.endswith('.zip'): with open(filename, 'wb') as fout: self.services.logging.info("Downloading %s" % filename) fout.write(res) else: with open(filename, 'w') as fout: self.services.logging.info("Downloading %s" % filename) fout.write(res)