Ejemplo n.º 1
0
class BiGG():
    """
    Interface to the `BiGG Models <http://bigg.ucsd.edu/>` API Service.

    ::

        >>> from bioservices import BiGG
        >>> bigg = BiGG()
        >>> bigg.search("e coli", "models")
        [{'bigg_id': 'e_coli_core',
          'gene_count': 137,
          'reaction_count': 95,
          'organism': 'Escherichia coli str. K-12 substr. MG1655',
          'metabolite_count': 72},
          ...
        ]
    """

    _base_url = "http://bigg.ucsd.edu"
    _api_version = "v2"
    _url = "%s/api/%s" % (_base_url, _api_version)

    def __init__(self, verbose=False, cache=False):

        # http://bigg.ucsd.edu/data_access
        self.services = REST(name="BiGG",
            url=BiGG._url, cache=cache, requests_per_sec=10,
            verbose=verbose)

    def __len__(self):
        return len(self.models)

    @property
    def version(self):
        return self.services.http_get("database_version")

    def _http_get_results(self, *args, **kwargs):
        response = self.services.http_get(*args, **kwargs)
        return response["results"]

    @property
    def models(self):
        return self._http_get_results("models")

    def _get_model_resource(self, type_, model_id, ids=None):
        if type_ not in _ACCEPTABLE_MODEL_RESOURCE_TYPES:
            raise TypeError("Unknown model resource type %s. Acceptable types are %s"
                % (type_, _ACCEPTABLE_MODEL_RESOURCE_TYPES))

        query = "models/%s/%s" % (model_id, type_)

        if ids is None:
            return self._http_get_results(query)

        ids = sequencify(ids)
        queries = [("%s/%s" % (query, id_)) for id_ in ids]

        response = self.services.http_get(queries)
        return squash(response)

    def metabolites(self, model_id=None, ids=None):
        if model_id is None:
            return self._http_get_results("universal/metabolites")

        return self._get_model_resource("metabolites", model_id=model_id, ids=ids)

    def reactions(self, model_id=None, ids=None):
        if model_id is None:
            return self._http_get_results("universal/reactions")

        return self._get_model_resource("reactions", model_id=model_id, ids=ids)

    def genes(self, model_id, ids=None):
        return self._get_model_resource("genes", model_id=model_id, ids=ids)

    def search(self, query, type_):
        if type_ not in _ACCEPTABLE_SEARCH_TYPES:
            raise TypeError("Unknown type %s. Acceptable types are %s"
                % (type_, _ACCEPTABLE_SEARCH_TYPES))

        params = { "query": query, "search_type": type_ }
        return self._http_get_results("search", params=params)

    def download(self, model_id, format_="json", gzip=True, target=None):
        if format_ not in _ACCEPTABLE_MODEL_DOWNLOAD_FORMATS:
            raise TypeError("Unknown format %s. Accepted types are %s."
                % (format_, _ACCEPTABLE_MODEL_DOWNLOAD_FORMATS))

        path = "%s.%s" % (model_id, format_)

        if gzip:
            path += ".gz"

        if not target:
            target = path

        url = self.services._build_url("%s/static/models/%s" %
            (BiGG._base_url, path))

        response = self.services.session.get(url, stream=True)

        if response.ok:
            with open(target, "wb") as f:
                for content in response.iter_content():
                    f.write(content)
        else:
            response.raise_for_status()
Ejemplo n.º 2
0
class Reactome():
    """



    .. todo:: interactors, orthology, particiapnts, person,
        query, refernces, schema



    """

    _url = "https://reactome.org/ContentService"

    def __init__(self, verbose=True, cache=False):
        self.services = REST(name="Reactome",
                             url=Reactome._url,
                             verbose="ERROR",
                             cache=False)
        self.debugLevel = verbose

    @property
    def version(self):
        return self.services.http_get("data/database/version", frmt="txt")

    @property
    def name(self):
        return self.services.http_get("data/database/name", frmt="txt")

    def get_discover(self, identifier):
        """The schema.org for an Event in Reactome knowledgebase

        For each event (reaction or pathway) this method generates a
        json file representing the dataset object as defined by
        schema.org (http). This is mainly used by search engines in
        order to index the data

        ::

            r.data_discover("R-HSA-446203")

        """
        res = self.services.http_get("data/discover/{}".format(identifier),
                                     frmt="json")
        return res

    def get_diseases(self):
        """list of diseases objects"""
        return self.services.http_get("data/diseases", frmt="json")

    def get_diseases_doid(self):
        """retrieves the list of disease DOIDs annotated in Reactome

        return: dictionary with DOID contained in the values()
        """
        res = self.services.http_get("data/diseases/doid", frmt="txt")
        res = dict([x.split() for x in res.split("\n")])
        return res

    def get_interactors_psicquic_molecule_details(self):
        """Retrieve clustered interaction, sorted by score, of a given accession by resource."""
        raise NotImplementedError

    def get_interactors_psicquic_molecule_summary(self):
        """Retrieve a summary of a given accession by resource"""
        raise NotImplementedError

    def get_interactors_psicquic_resources(self):
        """Retrieve a list of all Psicquic Registries services"""
        raise NotImplementedError

    def get_interactors_static_molecule_details(self):
        """Retrieve a detailed interaction information of a given accession"""
        raise NotImplementedError

    def get_interactors_static_molecule_pathways(self):
        """Retrieve a list of lower level pathways where the interacting molecules can be found"""
        raise NotImplementedError

    def get_interactors_static_molecule_summary(self):
        """Retrieve a summary of a given accession"""
        raise NotImplementedError

    def get_exporter_fireworks(self):
        raise NotImplementedError

    def get_exporter_reaction(self):
        raise NotImplementedError

    def get_exporter_diagram(self,
                             identifier,
                             ext="png",
                             quality=5,
                             diagramProfile="Modern",
                             analysisProfile="Standard",
                             filename=None):
        """Export a given pathway diagram to raster file

        This method accepts identifiers for Event class instances.
        When a diagrammed pathway is provided, the diagram is exported
        to the specified format. When a subpathway is provided, the
        diagram for the parent is exported and the events that are part
        of the subpathways are selected. When a reaction is provided,
        the diagram containing the reaction is exported and the reaction
        is selected.

        :param identifier: Event identifier (it can be a pathway with
            diagram, a subpathway or a reaction)
        :param ext: File extension (defines the image format) in png,
            jpeg, jpg, svg, gif
        :param quality: Result image quality between [1 - 10]. It
            defines the quality of the final image (Default 5)
        :param flg: not implemented
        :param sel: not implemented
        :param diagramProfile: Diagram Color Profile
        :param token: not implemented
        :param analysisProfile: Analysis Color Profile
        :param expColumn: not implemented
        :param filename: if given, save the results in the provided filename

        return: raw data if filename parameter is not set. Otherwise, the data
            is saved in the filename and the function returns None

        """
        assert ext in ['png', 'jpg', 'jpeg', 'svg', "gif"]
        assert quality in range(11)
        assert diagramProfile in ["Modern", "Standard"]
        assert analysisProfile in ["Standard", "Strosobar", "Copper Plus"]

        params = {
            "diagramProfile": diagramProfile,
            "analysisProfile": analysisProfile,
            "quality": quality
        }

        res = self.services.http_get("exporter/diagram/{}.{}".format(
            identifier, ext),
                                     params=params,
                                     frmt=ext)
        if filename:
            if ext != "svg":
                with open(filename, "wb") as fout:
                    fout.write(res)
            else:
                with open(filename, "w") as fout:
                    fout.write(content)
        else:
            return res

    def get_complex_subunits(self,
                             identifier,
                             excludeStructuresSpecifies=False):
        """A list with the entities contained in a given complex

        Retrieves the list of subunits that constitute any given complex.
        In case the complex comprises other complexes, this method
        recursively traverses the content returning each contained
        PhysicalEntity. Contained complexes and entity sets can be
        excluded setting the ‘excludeStructures’ optional parameter to ‘true’

        :param identifier: The complex for which subunits are requested
        :param excludeStructures: Specifies whether contained complexes
            and entity sets are excluded in the response

        ::

            r.get_complex_subunits("R-HSA-5674003")
        """
        params = {"excludeStructuresSpecifies": excludeStructuresSpecifies}
        res = self.services.http_get(
            "data/complex/{}/subunits".format(identifier),
            params=params,
            frmt="json")
        return res

    def get_complexes(self, resources, identifier):
        """A list of complexes containing the pair (identifier, resource)

        Retrieves the list of complexes that contain a given (identifier,
        resource). The method deconstructs the complexes into all its
        participants to do so.

        :param resource: The resource of the identifier for complexes are
            requested (e.g. UniProt)
        :param identifier: The identifier for which complexes are requested

        ::

            r.get_complexes(resources, identifier)
            r.get_complexes("UniProt", "P43403")

        """
        res = self.services.http_get("data/complexes/{}/{}".format(
            resources, identifier),
                                     frmt="json")
        return res

    def get_entity_componentOf(self, identifier):
        """A list of larger structures containing the entity

        Retrieves the list of structures (Complexes and Sets) that
        include the given entity as their component. It should be
        mentioned that the list includes only simplified entries
        (type, names, ids) and not full information about each item.

        ::

            r.get_entity_componentOf("R-HSA-199420")

        """
        res = self.services.http_get(
            "data/entity/{}/componentOf".format(identifier), frmt="json")
        return res

    def get_entity_otherForms(self, identifier):
        """All other forms of PhysicalEntity

        Retrieves a list containing all other forms of the given
        PhysicalEntity. These other forms are PhysicalEntities that
        share the same ReferenceEntity identifier, e.g. PTEN
        H93R[R-HSA-2318524] and PTEN C124R[R-HSA-2317439] are two
        forms of PTEN.

        ::

            r.get_entity_otherForms("R-HSA-199420")

        """
        res = self.services.http_get(
            "data/entity/{}/otherForms".format(identifier), frmt="json")
        return res

    def get_event_ancestors(self, identifier):
        """The ancestors of a given event

        The Reactome definition of events includes pathways and reactions.
        Although events are organised in a hierarchical structure, a single
        event can be in more than one location, i.e. a reaction can take
        part in different pathways while, in the same way, a sub-pathway
        can take part in many pathways. Therefore, this method retrieves
        a list of all possible paths from the requested event to the top
        level pathway(s).

        :param identifier: The event for which the ancestors are requested

        ::

            r.get_event_ancestors("R-HSA-5673001")

        """
        res = self.services.http_get(
            "data/event/{}/ancestors".format(identifier), frmt="json")
        return res

    def get_eventsHierarchy(self, species):
        """The full event hierarchy for a given species

        Events (pathways and reactions) in Reactome are organised in a
        hierarchical structure for every species. By following all
        ‘hasEvent’ relationships, this method retrieves the full event
        hierarchy for any given species. The result is a list of tree
        structures, one for each TopLevelPathway. Every event in these trees is
        represented by a PathwayBrowserNode. The latter contains the stable identifier,
        the name, the species, the url, the type, and the diagram of the particular
        event.

        :param species: Allowed species filter: SpeciesName (eg: H**o sapiens)
            SpeciesTaxId (eg: 9606)

        ::

            r.get_eventsHierarchy(9606)
        """

        res = self.services.http_get("data/eventsHierarchy/{}".format(species),
                                     frmt="json")
        return res

    def get_exporter_sbml(self, identifier):
        """Export given Pathway to SBML


        :param identifier: DbId or StId of the requested database object

        ::

            r.exporter_sbml("R-HSA-68616")

        """
        res = self.services.http_get("exporter/sbml/{}.xml".format(identifier),
                                     frmt="xml")
        return res

    def get_pathway_containedEvents(self, identifier):
        """All the events contained in the given event

        Events are the building blocks used in Reactome to represent
        all biological processes, and they include pathways and reactions.
        Typically, an event can contain other events. For example, a
        pathway can contain smaller pathways and reactions. This method
        recursively retrieves all the events contained in any given event.

        ::

            res = r.get_pathway_containedEvents("R-HSA-5673001")

        """
        res = self.services.http_get(
            "data/pathway/{}/containedEvents".format(identifier), frmt="json")
        return res

    def get_pathway_containedEvents_by_attribute(self, identifier, attribute):
        """A single property for each event contained in the given event

        Events are the building blocks used in Reactome to represent all 
        biological processes, and they include pathways and reactions. 
        Typically, an event can contain other events. For example, a 
        pathway can contain smaller pathways (subpathways) and reactions.
        This method recursively retrieves a single attribute for each of 
        the events contained in the given event.


        :param identifier: The event for which the contained events are requested
        :param attribute: Attrubute to be filtered

        ::

             r.get_pathway_containedEvents_by_attribute("R-HSA-5673001", "stId")

        """
        res = self.services.http_get(
            "data/pathway/{}/containedEvents/{}".format(identifier, attribute),
            frmt="txt")
        try:
            res = [x.strip() for x in res[1:-1].split(",")]
        except:
            pass
        return res

    def get_pathways_low_diagram_entity(self, identifier):
        """A list of lower level pathways with diagram containing 
        a given entity or event

        This method traverses the event hierarchy and retrieves the 
        list of all lower level pathways that have a diagram and 
        contain the given PhysicalEntity or Event.

        :param identifier: The entity that has to be present in the pathways
        :param species:  The species for which the pathways are requested. 
            Taxonomy identifier (eg: 9606) or species name (eg: ‘H**o sapiens’)

        ::

            r.get_pathways_low_diagram_entity("R-HSA-199420")

        """
        res = self.services.http_get(
            "data/pathways/low/diagram/entity/{}".format(identifier),
            frmt="json")
        return res

    def get_pathways_low_diagram_entity_allForms(self, identifier):
        """

        ::

            r.get_pathways_low_diagram_entity_allForms("R-HSA-199420")
        """
        res = self.services.http_get(
            "data/pathways/low/diagram/entity/{}/allForms".format(identifier),
            frmt="json")
        return res

    def get_pathways_low_diagram_identifier_allForms(self, identifier):
        """

        ::

            r.get_pathways_low_diagram_identifier_allForms("PTEN")

        """
        res = self.services.http_get(
            "data/pathways/low/diagram/identifier/{}/allForms".format(
                identifier),
            frmt="json")
        return res

    def get_pathways_low_entity(self, identifier):
        """A list of lower level pathways containing a given entity or event

        This method traverses the event hierarchy and retrieves the 
        list of all lower level pathways that contain the given     
        PhysicalEntity or Event.

        ::

            r.get_pathways_low_entity("R-HSA-199420")
        """
        res = self.services.http_get(
            "data/pathways/low/entity/{}".format(identifier), frmt="json")
        return res

    def get_pathways_low_entity_allForms(self, identifier):
        """A list of lower level pathways containing any form of a given entity 

        This method traverses the event hierarchy and retrieves the list of all 
        lower level pathways that contain the given PhysicalEntity in any of 
        its variant forms. These variant forms include for example different 
        post-translationally modified versions of a single protein, or the 
        same chemical in different compartments.

        ::

            r.get_pathways_low_entity_allForms("R-HSA-199420")
        """
        res = self.services.http_get(
            "data/pathways/low/entity/{}/allForms".format(identifier),
            frmt="json")
        return res

    def get_pathways_top(self, species):
        res = self.services.http_get("data/pathways/top/{}".format(species),
                                     frmt="json")
        return res

    def get_references(self, identifier):
        """All referenceEntities for a given identifier

        Retrieves a list containing all the reference entities for a given
        identifier.

        ::

            r.get_references(15377)

        """
        res = self.services.http_get(
            "references/mapping/{}".format(identifier), frmt="json")
        return res

    def get_mapping_identifier_pathways(self, resource, identifier):
        res = self.services.http_get("data/mapping/{}/{}/pathways".format(
            resource, identifier),
                                     frmt="json")
        return res

    def get_mapping_identifier_reactions(self, resource, identifier):
        res = self.services.http_get("data/mapping/{}/{}/reactions".format(
            resource, identifier),
                                     frmt="json")

    def search_facet(self):
        """A list of facets corresponding to the whole Reactome search data

        This method retrieves faceting information on the whole Reactome search data.


        """
        res = self.services.http_get("search/facet", frmt="json")
        return res

    def search_facet_query(self, query):
        """A list of facets corresponding to a specific query

        This method retrieves faceting information on a specific query

        """
        res = self.services.http_get(
            "search/facet_query?query={}".format(query), frmt="json")
        return res

    def search_query(self, query):
        """Queries Solr against the Reactome knowledgebase

        This method performs a Solr query on the Reactome knowledgebase.
        Results can be provided in a paginated format.

        """
        res = self.services.http_get("search/query?query={}".format(query),
                                     frmt="json")
        return res

    def search_spellcheck(self, query):
        """Spell-check suggestions for a given query

        This method retrieves a list of spell-check suggestions
        for a given search term.

        """
        res = self.services.http_get(
            "search/spellcheck?query={}".format(query), frmt="json")
        return res

    def search_suggest(self, query):
        """Autosuggestions for a given query


        This method retrieves a list of suggestions for a given search term.

        ::

            >>> r.http_get("search/suggest?query=apopt")
            ['apoptosis', 'apoptosome', 'apoptosome-mediated', 'apoptotic']

        """
        res = self.services.http_get(
            "search/suggest?query={}".format(identifier), frmt="json")
        return res

    def get_species_all(self):
        """the list of all species in Reactome"""
        res = self.services.http_get("data/species/all", frmt="json")
        return res

    def get_species_main(self):
        """the list of main species in Reactome

        ::

            r.get_species_main()


        """
        res = self.services.http_get("data/species/main", frmt="json")
        return res
Ejemplo n.º 3
0
class PDBe():
    """Interface to part of the `PDBe <http://www.ebi.ac.uk/pdbe>`_ service

    .. doctest::

        >>> from bioservices import PDBe
        >>> s = PDBe()
        >>> res = s.get_file("1FBV", "pdb")

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def _check_id(self, pdbid):
        if isinstance(pdbid, list):
            pdbid = ",".join(pdbid)

        if isinstance(pdbid, str):
            for item in pdbid.split(","):
                assert len(item) == 4, "a 4-character PDB id code is required"
        else:
            raise TypeError(
                "pdb id must be either a 4-character pdb id, a list of valid PDB ids, or a string made of pdb ids, separated by commas"
            )

        return pdbid

    def _return(self, res):
        if res == 404:
            return {}
        return res

    def get_summary(self, query):
        """Returns summary of a PDB entry

        This can be title of the entry, list of depositors, date of deposition,
        date of release, date of latest revision, experimental method, list
        of related entries in case split entries, etc.

        :param query: a 4-character PDB id code

        ::

            p.get_summary('1cbs')
            p.get_summary('1cbs,2kv8')
            p.get_summary(['1cbs', '2kv8'])

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("summary/{}".format(query))
        else:
            res = self.services.http_post("summary", data=query, frmt="json")
        return self._return(res)

    def get_molecules(self, query):
        """Return details of molecules  (or entities in mmcif-speak) modelled in the entry

        This can be entity id, description, type, polymer-type (if applicable), number
        of copies in the entry, sample preparation method, source organism(s)
        (if applicable), etc.

        :param query: a 4-character PDB id code

        ::

            p.get_molecules('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("molecules/{}".format(query))
        else:
            res = self.services.http_post("molecules", data=query, frmt="json")
        return self._return(res)

    def get_related_publications(self, query):
        """Return publications obtained from both EuroPMC and UniProt. T


        These are articles which cite the primary citation of the entry, or
        open-access articles which mention the entry id without explicitly citing the
        primary citation of an entry.


        :param query: a 4-character PDB id code

        ::

            p.get_related_publications('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "related_publications/{}".format(query))
        else:
            res = self.services.http_post("related_publications/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_experiment(self, query):
        """Provides details of experiment(s) carried out in determining the structure of the entry.

        Each experiment is described in a separate dictionary.
        For X-ray diffraction, the description consists of resolution, spacegroup, cell
        dimensions, R and Rfree, refinement program, etc.
        For NMR, details of spectrometer, sample, spectra, refinement, etc. are
        included.
        For EM, details of specimen, imaging, acquisition, reconstruction, fitting etc.
        are included.

        :param query: a 4-character PDB id code

        ::

            p.get_experiment('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("experiment/{}".format(query))
        else:
            res = self.services.http_post("experiment/{}",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_nmr_resources(self, query):
        """This call provides URLs of available additional resources for NMR
        entries. E.g., mapping between structure (PDB) and chemical shift (BMRB)
        entries.
        :param query: a 4-character PDB id code

        ::

            p.get_nmr_resources('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("nmr_resources/{}".format(query))
        else:
            res = self.services.http_post("nmr_resources/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_ligand_monomers(self, query):
        """Provides a a list of modelled instances of ligands,

        ligands i.e. 'bound' molecules that are not waters.

        :param query: a 4-character PDB id code

        ::

            p.get_ligand_monomers('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("ligand_monomers/{}".format(query))
        else:
            res = self.services.http_post("ligand_monomers",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_modified_residues(self, query):
        """Provides a list of modelled instances of modified amino acids or
        nucleotides in protein, DNA or RNA chains.


        :param query: a 4-character PDB id code

        ::

            p.get_modified_residues('4v5j')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("modified_AA_or_NA/{}".format(query))
        else:
            res = self.services.http_post("modified_AA_or_NA",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_mutated_residues(self, query):
        """Provides a list of modelled instances of mutated amino acids or
        nucleotides in protein, DNA or RNA chains.


        :param query: a 4-character PDB id code

        ::

            p.get_mutated_residues('1bgj')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("mutated_AA_or_NA/{}".format(query))
        else:
            res = self.services.http_get("mutated_AA_or_NA",
                                         data=query,
                                         frmt="json")
        return self._return(res)

    def get_release_status(self, query):
        """Provides status of a PDB entry (released, obsoleted, on-hold etc)
        along with some other information such as authors, title, experimental method,
        etc.

        :param query: a 4-character PDB id code

        ::

            p.get_release_status('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("status/{}".format(query))
        else:
            res = self.services.http_get("status/{}", data=query, frmt="json")
        return self._return(res)

    def get_observed_ranges(self, query):
        """Provides observed ranges, i.e., segments of structural coverage of
         polymeric molecues that are modelled fully or partly

        :param query: a 4-character PDB id code

        ::

            p.get_observed_ranges('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("polymer_coverage/{}".format(query))
        else:
            res = self.services.http_post("polymer_coverage",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_observed_ranges_in_pdb_chain(self, query, chain_id):
        """Provides observed ranges, i.e., segments of structural coverage of
         polymeric molecules in a particular chain

        :param query: a 4-character PDB id code
        :param query: a PDB chain ID

        ::

            p.get_observed_ranges_in_pdb_chain('1cbs', "A")


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        res = self.services.http_get("polymer_coverage/{}/chain/{}".format(
            query, chain_id))
        return self._return(res)

    def get_secondary_structure(self, query):
        """Provides residue ranges of regular secondary structure 

        (alpha helices and beta strands) found in protein chains of the entry.
        For strands, sheet id can be used to identify a beta sheet.



        :param query: a 4-character PDB id code

        ::

            p.get_secondary_structure('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "secondary_structure/{}".format(query))
        else:
            res = self.services.http_post("secondary_structure/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_residue_listing(self, query):
        """Provides lists all residues (modelled or otherwise) in the entry.
    
        Except waters, along with details of the fraction of expected atoms modelled for
        the residue and any alternate conformers.


        :param query: a 4-character PDB id code

        ::

            p.get_residue_listing('1cbs')


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("residue_listing/{}".format(query))
        return self._return(res)

    def get_residue_listing_in_pdb_chain(self, query, chain_id):
        """Provides all residues (modelled or otherwise) in the entry

        Except waters, along with details of the fraction of expected atoms 
        modelled for the residue and any alternate conformers.

        :param query: a 4-character PDB id code
        :param query: a PDB chain ID

        ::

            p.get_residue_listing_in_pdb_chain('1cbs')


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("residue_listing/{}".format(
                query, chain_id))
        return self._return(res)

    def get_binding_sites(self, query):
        """Pprovides details on binding sites in the entry

        STRUCT_SITE records in PDB files (or mmcif equivalent thereof), such as ligand,
        residues in the site, description of the site, etc.


        :param query: a 4-character PDB id code

        ::

            p.get_binding_sites('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("binding_sites/{}".format(query))
        else:
            res = self.services.http_post("binding_sites",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_files(self, query):
        """Provides URLs and brief descriptions (labels) for PDB entry

        Also, for mmcif files, biological assembly files, FASTA file for sequences, 
        SIFTS cross reference XML files, validation XML files, X-ray structure 
        factor file, NMR experimental constraints files, etc. 

        :param query: a 4-character PDB id code

        ::

            p.get_files('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("files/{}".format(query))
        else:
            res = self.services.http_post("files", data=query, frmt="json")
        return self._return(res)

    def get_observed_residues_ratio(self, query):
        """Provides the ratio of observed residues for each chain in each molecule

        The list of chains within an entity is sorted by observed_ratio (descending order),
         partial_ratio (ascending order), and number_residues (descending order).

        :param query: a 4-character PDB id code

        ::

            p.get_observed_residues_ratio('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "observed_residues_ratio/{}".format(query))
        else:
            res = self.services.http_post("observed_residues_ratio",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_assembly(self, query):
        """Provides information for each assembly of a given PDB ID. T

        This information is broken down at the entity level for each assembly. The
        information given includes the molecule name, type and class, the chains where
        the molecule occur, and the number of copies of each entity in the assembly.

        :param query: a 4-character PDB id code

        ::

            p.get_assembly('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("assembly/{}".format(query))
        else:
            res = self.services.http_post("assembly", data=query, frmt="json")
        return self._return(res)

    def get_electron_density_statistics(self, query):
        """This call details the statistics for electron density.

        :param query: a 4-character PDB id code

        ::

            p.get_electron_density_statistics('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "electron_density_statistics/{}".format(query))
        else:
            res = self.services.http_post("electron_density_statistics",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_functional_annotation(self, query):
        """Provides functional annotation of all ligands, i.e. 'bound'

        :param query: a 4-character PDB id code

        ::

            p.get_functional_annotation('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("cofactor/{}".format(query))
        else:
            res = self.services.http_post("cofactor", data=query, frmt="json")
        return self._return(res)

    def get_drugbank_annotation(self, query):
        """This call provides DrugBank annotation of all ligands, i.e. 'bound'

        :param query: a 4-character PDB id code

        ::

            p.get_drugbank_annotation('5hht')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("drugbank/{}".format(query))
        else:
            res = self.services.http_post("drugbank", data=query, frmt="json")
        return self._return(res)

    def get_related_dataset(self, query):
        """Provides DOI’s for related raw experimental datasets

        Includes diffraction image data, small-angle scattering data and
        electron micrographs.


        :param query: a 4-character PDB id code

        ::

            p.get_cofactor('5o8b')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "related_experiment_data/{}".format(query))
        else:
            res = self.services.http_post("related_experiment_data",
                                          data=query,
                                          frmt="json")
        return self._return(res)
Ejemplo n.º 4
0
class ArrayExpress():
    """Interface to the `ArrayExpress <http://www.ebi.ac.uk/arrayexpress>`_ service

    ArrayExpress allows to retrieve data sets used in various experiments.

    **QuickStart** Given an experiment name (e.g., E-MEXP-31), type::

        s = ArrayExpress()
        s.getAE('E-MEXP-31')

    You can also quickyl retrieve experiments matching some search queries as
    follows::

        a.queryAE(keywords="pneumonia", species='h**o+sapiens')

    Now let us look at other methods.If you know the file and experiment
    name, you can retrieve a specific file as follows::

        >>> from bioservices import ArrayExpress
        >>> s = ArrayExpress()
        >>> # retrieve a specific file from a experiment
        >>> res = s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt")

    The main issue is that you may not know the experiment you are looking for.
    You can query experiments by keyword::

        >>> # Search for experiments
        >>> res = s.queryExperiments(keywords="cancer+breast", wholewords=True)

    keywords used in queries follows these rules:

    * Accession number and keyword searches are case insensitive
    * More than one keyword can be searched for using the + sign (e.g. keywords="cancer+breast")
    * Use an asterisk as a multiple character wild card (e.g. keywords="colo*")
    * use a question mark ? as a single character wild card (e.g. keywords="te?t")

    More complex queries can be constructed using the operators AND, OR or NOT.
    AND is the default if no operator is specified. Either experiments or
    files can be searched for. Examples are::

        keywords="prostate+AND+breast"
        keywords="prostate+breast"      # same as above
        keywords="prostate+OR+breast"
        keywords="prostate+NOT+breast "

    The returned objects are XML parsed with beautifulSoup. You can get all
    experiments using the getChildren method:

    .. doctest::
        :options: +SKIP

        >>> res = s.queryExperiments(keywords="breast+cancer")
        >>> len(res.getchildren())
        1487


    If you know what you are looking for, you can give the experiment name::

        >>> res = s.retrieveExperiment("E-MEXP-31")
        >>> exp = res.getchildren()[0]   # it contains only one experiment
        >>> [x.text for x in exp.getchildren() if x.tag == "name"]
        ['Transcription profiling of mammalian male germ cells undergoing mitotic
        growth, meiosis and gametogenesis in highly enriched cell populations']

    Using the same example, you can retrieve the names of the files related to
    the experiment::

        >>> files = [x.getchildren() for x in exp.getchildren() if x.tag == "files"]
        >>> [x.get("name") for x in files[0]]
        ['E-MEXP-31.raw.1.zip',
         'E-MEXP-31.processed.1.zip',
         'E-MEXP-31.idf.txt',
         'E-MEXP-31.sdrf.txt']

    New in version 1.3.7 you can use the method :meth:`getEA`

    Then, you may want to download a particular file::

        >>> s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt")


    .. seealso:: :meth:`queryFiles` for more details about the parameters to be
        used in queries.

    .. warning:: supports only new style (v2). You can still use the old style by
        setting the request manually using the :meth:`version`.

    .. warning:: some syntax requires the + character, which is a special character
        for http requests. It is replaced internally by spaces if found
    .. warning:: filtering is not implemented (e.g., assaycount:[x TO y]syntax.)
    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages

        """
        self.services = REST(name="ArrayExpress",
                             url="http://www.ebi.ac.uk/arrayexpress",
                             cache=cache,
                             verbose=verbose)

        self.version = "v2"

    def _search(self, mode, **kargs):
        """common function to search for files or experiments"""
        assert mode in ["experiments", "files"]
        url = "{0}/{1}/{2}".format("json", self.version, mode)

        defaults = {
            "accession":
            None,  #ex: E-MEXP-31
            "keywords":
            None,
            "species":
            None,
            "wholewords":
            "on",
            "expdesign":
            None,
            "exptype":
            None,
            "gxa":
            "true",
            "pmid":
            None,
            "sa":
            None,
            "ef":
            None,  # e.g., CellType
            "efv":
            None,  # e.g., HeLa
            "array":
            None,  # ex: A-AFFY-33
            "expandfo":
            "on",
            "directsub":
            "true",
            "sortby": [
                "accession", "name", "assays", "species", "releasedate",
                "fgem", "raw", "atlas"
            ],
            "sortorder": ["ascending", "descending"],
        }

        for k in kargs.keys():
            if k not in defaults.keys():
                raise ValueError(
                    "Incorrect value provided ({}). Correct values are {}".
                    format(k, sorted(defaults.keys())))

        #if len(kargs.keys()):
        #    url += "?"
        params = {}

        for k, v in kargs.items():
            if k in ["expandfo", "wholewords"]:
                if v in ["on", True, "true", "TRUE", "True"]:
                    #params.append(k + "=on")
                    params[k] = "on"
            elif k in ["gxa", "directsub"]:
                if v in ["on", True, "true", "TRUE", "True"]:
                    #params.append(k + "=true")
                    params[k] = "true"
                elif v in [False, "false", "False"]:
                    #params.append(k + "=false")
                    params[k] = "false"
                else:
                    raise ValueError("directsub must be true or false")
            else:
                if k in ["sortby", "sortorder"]:
                    self.services.devtools.check_param_in_list(v, defaults[k])
                #params.append(k + "=" + v)
                params[k] = v

        # NOTE: + is a special character that is replaced by %2B
        # The + character is the proper encoding for a space when quoting
        # GET or POST data. Thus, a literal + character needs to be escaped
        # as well, lest it be decoded to a space on the other end
        for k, v in params.items():
            params[k] = v.replace("+", " ")

        self.services.logging.info(url)
        res = self.services.http_get(url, frmt="json", params=params)
        return res

    def queryFiles(self, **kargs):
        """Retrieve a list of files associated with a set of experiments

        The following parameters are used to search for experiments/files:

        :param str accession: experiment primary or secondary accession e.g. E-MEXP-31
        :param str array: array design accession or name e.g., A-AFFY-33
        :param str ef: Experimental factor, the name of the main variables in an
            experiment. (e.g., CellType)
        :param str efv:  Experimental factor value. Has EFO expansion. (e.g.,
            HeLa)
        :param str expdesign: Experiment design type  (e.g., "dose+response")
        :param str exptype:  Experiment type. Has EFO expansion. (e.g.,
            "RNA-seq")
        :param str gxa: Presence in the Gene Expression Atlas. Only value is gxa=true.
        :param str keywords: e.g. "cancer+breast"
        :param str pmid: PubMed identifier (e.g., 16553887)
        :param str sa: Sample attribute values. Has EFO expansion. fibroblast
        :param str species: Species of the samples.Has EFO expansion. (e.g., "h**o+sapiens")
        :param bool wholewords:

        The following parameters can filter the experiments:

        :param str directsub: only experiments directly submitted to
            ArrayExpress (true) or only imported from GEO databae (false)


        The following parameters can sort the results:

        :param str sortby: sorting by grouping (can be accession, name, assays,
            species, releasedata, fgem, raw, atlas)
        :param str sortorder: sorting by orderering. Can be either ascending or
            descending (default)

        .. doctest::
            :options: +SKIP

            >>> from bioservices import ArrayExpress
            >>> s = ArrayExpress()
            >>> res = s.queryFiles(keywords="cancer+breast", wholewords=True)
            >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o Sapiens")
            >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o Sapiens",
            ...                          sortorder="releasedate")
            >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o+Sapiens",
            ...     expdesign="dose response", sortby="releasedate", sortorder="ascending")
            >>> dates = [x.findall("releasedate")[0].text for x in res.getchildren()]

        """
        res = self._search("files", **kargs)
        return res

    def queryExperiments(self, **kargs):
        """Retrieve experiments

        .. seealso:: :meth:`~bioservices.arrayexpress.ArrayExpress.queryFiles` for
            all possible keywords

        .. doctest::
            :options: +SKIP

            >>> res = s.queryExperiments(keywords="cancer+breast", wholewords=True)

        """
        res = self._search("experiments", **kargs)
        return res

    def retrieveExperiment(self, experiment):
        """alias to queryExperiments if you know the experiment name

        ::

            >>> s.retrieveExperiment("E-MEXP-31")
            >>> # equivalent to
            >>> s.queryExperiments(accession="E-MEXP-31")

        """
        res = self.queryExperiments(keywords=experiment)
        return res

    def retrieveFile(self, experiment, filename, save=False):
        """Retrieve a specific file from an experiment

        :param str filename:

        ::

            >>> s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt")
        """
        files = self.retrieveFilesFromExperiment(experiment)

        assert filename in files, """Error. Provided filename does not seem to be correct.
            Files available for %s experiment are %s """ % (experiment, files)

        url = "files/" + experiment + "/" + filename

        if save:
            res = self.services.http_get(url, frmt="txt")
            f = open(filename, "w")
            f.write(res)
            f.close()
        else:
            res = self.services.http_get(url, frmt="txt")
            return res

    def retrieveFilesFromExperiment(self, experiment):
        """Given an experiment, returns the list of files found in its description


        :param str experiment: a valid experiment name
        :return: the experiment files

        .. doctest::

            >>> from bioservices import ArrayExpress
            >>> s = ArrayExpress(verbose=False)
            >>> s.retrieveFilesFromExperiment("E-MEXP-31")
            ['E-MEXP-31.raw.1.zip', 'E-MEXP-31.processed.1.zip', 'E-MEXP-31.idf.txt', 'E-MEXP-31.sdrf.txt']


        """
        res = self.queryExperiments(keywords=experiment)
        exp = res['experiments']['experiment']
        files = exp['files']
        output = [v['name'] for k, v in files.items() if k]
        return output

    def queryAE(self, **kargs):
        """Returns list of experiments

        See :meth:`queryExperiments` for parameters and usage

        This is a wrapper around :meth:`queryExperiments` that returns only
        the accession values.

        ::

            a.queryAE(keywords="pneumonia", species='h**o+sapiens')
        """
        sets = self.queryExperiments(**kargs)
        return [x['accession'] for x in sets['experiments']['experiment']]

    def getAE(self, accession, type='full'):
        """retrieve all files from an experiments and save them locally"""
        filenames = self.retrieveFilesFromExperiment(accession)
        self.services.logging.info("Found %s files" % len(filenames))
        for i, filename in enumerate(filenames):
            res = self.retrieveFile(accession, filename)
            if filename.endswith('.zip'):
                with open(filename, 'wb') as fout:
                    self.services.logging.info("Downloading %s" % filename)
                    fout.write(res)
            else:
                with open(filename, 'w') as fout:
                    self.services.logging.info("Downloading %s" % filename)
                    fout.write(res)
Ejemplo n.º 5
0
class MyGeneInfo():
    """Interface to `mygene.infoe <http://mygene.info>`_ service

    .. doctest::

        >>> from bioservices import MyGeneInfo
        >>> s = MyGeneInfoe()

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://mygene.info/v3"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def get_genes(self,
                  ids,
                  fields="symbol,name,taxid,entrezgene,ensemblgene",
                  species=None,
                  dotfield=True,
                  email=None):
        """Get matching gene objects for a list of gene ids


        :param ids: list of geneinfo IDs
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species:  can be used to limit the gene hits from given
            species. You can use "common names" for nine common species (human, mouse, rat,
            fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species,
            you can provide their taxonomy ids. Multiple species can be passed using comma
            as a separator. Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes(("301345,22637"))
            # first one is rat, second is mouse. This will return a 'notfound'
            # entry and the second entry as expected.
            mgi.get_genes("301345,22637", species="mouse") 

        """
        params = {"ids": ids, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if species:
            params["species"] = species

        res = self.services.http_post(
            "gene",  #params=params, 
            data=params,
            frmt="json",
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "application/json",
                "Content-Type": "application/x-www-form-urlencoded"
            })
        return res

    def get_one_gene(self,
                     geneid,
                     fields="symbol,name,taxid,entrezgene,ensemblgene",
                     dotfield=True,
                     email=None):
        """Get matching gene objects for one gene id

        :param geneid: a valid gene ID
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes("301345")
        """
        params = {"ids": geneid, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_get(f"gene/{geneid}",
                                     params=params,
                                     frmt="json")
        return res

    def get_one_query(self,
                      query,
                      email=None,
                      dotfield=True,
                      fields="symbol,name,taxid,entrezgene,ensemblgene",
                      species="human,mouse,rat",
                      size=10,
                      _from=0,
                      sort=None,
                      facets=None,
                      entrezonly=False,
                      ensemblonly=False):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param int size: the maximum number of matching gene hits to return
            (with a cap of 1000 at the moment). Default: 10.
        :param int _from: the number of matching gene hits to skip, starting
            from 0. Combining with "size" parameter, this can be useful for paging. Default:
            0.      
        :param sort: the comma-separated fields to sort on. Prefix with "-" for
            descending order, otherwise in ascending order. Default: sort by matching scores
            in decending order.
        :param str facets: a single field or comma-separated fields to return
            facets, for example, "facets=taxid", "facets=taxid,type_of_gene".
        :param bool entrezonly: when passed as True, the query returns only the hits 
            with valid Entrez gene ids. Default: False.
        :param bool ensembleonly: when passed as True, the query returns only the hits 
            with valid Ensembl gene ids. Default: False.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.




        """
        params = {"fields": fields, "size": size, "from": _from}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if sort:
            params["sort"] = sort
        if facets:  # pragma: no cover
            params["facets"] = sort
        assert entrezonly in [True, False]
        params["entrezonly"] = entrezonly
        assert ensemblonly in [True, False]
        params["ensemblonly"] = entrezonly

        res = self.services.http_get(f"query?q={query}",
                                     params=params,
                                     frmt="json")
        return res

    def get_queries(
        self,
        query,
        email=None,
        dotfield=True,
        scopes="all",
        species="human,mouse,rat",
        fields="symbol,name,taxid,entrezgene,ensemblgene",
    ):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
             "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
             the returned data object contains a single "refseq.rna" field, otherwise
             (False), a single "refseq" field with a sub-field of "rna". Default:
             True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.
        :param str scopes: not documented. Set to 'all'

        """
        params = {"q": query, "fields": fields, "scopes": scopes}
        if email:  # pragma: no cover
            params["email"] = email
        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_post("query",
                                      params=params,
                                      frmt="json",
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "application/json",
                                          "Content-Type":
                                          "application/x-www-form-urlencoded"
                                      })
        return res

    def get_metadata(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res

    def get_taxonomy(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res['taxonomy']
Ejemplo n.º 6
0
class PathwayCommons():
    """Interface to the `PathwayCommons <http://www.pathwaycommons.org/about>`_ service


    >>> from bioservices import *
    >>> pc2 = PathwayCommons(verbose=False)
    >>> res = pc2.get("http://identifiers.org/uniprot/Q06609")



    .. todo:: traverse() method not implemented. 
    """

    #: valid formats
    _valid_format = ["GSEA", "SBGN", "BIOPAX", "SIF", "TXT", "JSONLD"]
    _valid_directions = ["BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED"]
    _valid_patterns = [
            "CONTROLS_STATE_CHANGE_OF", "CONTROLS_PHOSPHORYLATION_OF", 
            "CONTROLS_TRANSPORT_OF", "CONTROLS_EXPRESSION_OF",
            "IN_COMPLEX_WITH", "INTERACTS_WITH", "CATALYSIS_PRECEDES", "NEIGHBOR_OF",
            "CONSUMPTION_CONTROLLED_BY", "CONTROLS_TRANSPORT_OF_CHEMICAL",
            "CONTROLS_PRODUCTION_OF",
            "CHEMICAL_AFFECTS", "REACTS_WITH", "USED_TO_PRODUCE"]
    _url = "https://www.pathwaycommons.org"
    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages

        """
        self.easyXMLConversion = False
        self._default_extension = "json"

        self.services = REST(name='PathwayCommons', url=PathwayCommons._url,
            verbose=verbose, cache=cache)

    # just a get/set to the default extension
    def _set_default_ext(self, ext):
        self.services.devtools.check_param_in_list(ext, ["json", "xml"])
        self._default_extension = ext
    def _get_default_ext(self):
        return self._default_extension
    default_extension = property(_get_default_ext, _set_default_ext,
             doc="set extension of the requests (default is json). Can be 'json' or 'xml'")

    def search(self, q, page=0, datasource=None, organism=None, type=None):
        """Text search in PathwayCommons using Lucene query syntax

        Some of the parameters are BioPAX properties, others are composite
        relationships.

        All index fields are (case-sensitive): comment, ecnumber,
        keyword, name, pathway, term, xrefdb, xrefid, dataSource, and organism.

        The pathway field maps to all participants of pathways that contain
        the keyword(s) in any of its text fields.

        Finally, keyword is a transitive aggregate field that includes all
        searchable keywords of that element and its child elements.

        All searches can also be filtered by data source and organism.

        It is also possible to restrict the domain class using the
        'type' parameter.

        This query can be used standalone or to retrieve starting points
        for graph searches.


        :param str q: requires a keyword , name, external identifier, or a
            Lucene query string.
        :param int page: (N>=0, default is 0), search result page number.
        :param str datasource: filter by data source (use names or URIs of
            pathway data sources or of any existing Provenance object). If
            multiple data source values are specified, a union of hits from
            specified sources is returned. datasource=[reactome,pid] returns
            hits associated with Reactome or PID.
        :param str organism: The organism can be specified either by
            official name, e.g. "h**o sapiens" or by NCBI taxonomy id,
            e.g. "9606". Similar to data sources, if multiple organisms
            are declared a union of all hits from specified organisms
            is returned. For example organism=[9606, 10016] returns results
            for both human and mice.
        :param str type: BioPAX class filter. (e.g., 'pathway', 'proteinreference')


        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(vverbose=False)
            >>> pc2.search("Q06609")
            >>> pc2.search("brca2", type="proteinreference",
                    organism="h**o sapiens",  datasource="pid")
            >>> pc2.search("name:'col5a1'", type="proteinreference", organism=9606)
            >>> pc2.search("a*", page=3)

        Find the FGFR2 keyword::

            pc2.search("FGFR2")

        Find pathways by FGFR2 keyword in any index field.::

            pc2.search("FGFR2", type="pathway")

        Finds control interactions that contain the word binding but not
        transcription in their indexed fields::

            pc2.search("binding NOT transcription", type="control")

        Find all interactions that directly or indirectly participate
        in a pathway that has a keyword match for "immune" (Note the star after
        immune):

            pc.search("pathway:immune*", type="conversion")


        Find all Reactome pathways::

            pc.search("*", type="pathway", datasource="reactome")

        """
        if self.default_extension == "xml":
            url = "pc2/search.xml?q=%s"  % q
        elif self.default_extension == "json":
            url = "pc2/search.json?q=%s"  % q

        params = {}
        if page>=0:
            params['page'] = page
        else:
            self.services.logging.warning("page should be >=0")

        if datasource:
            params['datasource'] = datasource

        if type:
            params['type'] = type

        if organism:
            params['organism'] = organism

        res = self.services.http_get(url, frmt=self.default_extension,
                params=params)

        #if self.default_extension == "json":
        #    res = json.loads(res)
        if self.default_extension == "xml":
            res = self.easyXML(res)

        return res

    def get(self, uri, frmt="BIOPAX"):
        """Retrieves full pathway information for a set of elements

        elements can be for example pathway, interaction or physical
        entity given the RDF IDs. Get commands only
        retrieve the BioPAX elements that are directly mapped to the ID.
        Use the :meth:`traverse` query to traverse BioPAX graph and
        obtain child/owner elements.

        :param str uri: valid/existing BioPAX element's URI (RDF ID; for
            utility classes that were "normalized", such as entity refereneces
            and controlled vocabularies, it is usually a Identifiers.org URL.
            Multiple IDs can be provided using list
            uri=[http://identifiers.org/uniprot/Q06609,
            http://identifiers.org/uniprot/Q549Z0']
            See also about MIRIAM and Identifiers.org.
        :param str format: output format (values)

        :return: a complete BioPAX representation for the record
            pointed to by the given URI is returned. Other output
            formats are produced by converting the BioPAX record on
            demand and can be specified by the optional format
            parameter. Please be advised that with some output formats
            it might return "no result found" error if the conversion is
            not applicable for the BioPAX result. For example,
            BINARY_SIF output usually works if there are some
            interactions, complexes, or pathways in the retrieved set
            and not only physical entities.


        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(verbose=False)
            >>> res = pc2.get("col5a1")
            >>> res = pc2.get("http://identifiers.org/uniprot/Q06609")


        """



        self.services.devtools.check_param_in_list(frmt, self._valid_format)

        # validates the URIs
        if isinstance(uri, str):
            url = "pc2/get?uri=" +uri
        elif instance(uri, list):
            url = "pc2/get?uri=" +uri[0]
            if len(uri)>1:
                for u in uri[1:]:
                    url += "&uri=" + u

        # ?uri=http://identifiers.org/uniprot/Q06609
        # http://www.pathwaycommons.org/pc2/get?uri=COL5A1

        if frmt != "BIOPAX":
            url += "&format=%s" % frmt

        if frmt.lower() in ["biopax", "sbgn"]: 
            frmt = "xml"
        else:
            frmt = "txt"
        res = self.services.http_get(url, frmt=frmt)

        return res

    def top_pathways(self, query="*", datasource=None, organism=None):
        """This command returns all *top* pathways

        Pathways can be top or pathways that are neither
        'controlled' nor 'pathwayComponent' of another process.

        :param query: a keyword, name, external identifier or lucene query
            string like in 'search'. Default is "*"
        :param str datasource: filter by data source (same as search)
        :param str organism: organism filter. 9606 for human.

        :return: dictionary with information about top pathways. Check the
            "searchHit" key for information about "dataSource" for instance


        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(verbose=False)
            >>> res = pc2.top_pathways()


https://www.pathwaycommons.org/pc2/top_pathways?q=TP53

        """
        if self.default_extension == "json":
            url = "pc2/top_pathways.json"
        else:
            url = "pc2/top_pathways"

        params = {}
        if datasource:
            params['datasource'] = datasource
        if organism:
            params['organism'] = organism
        params['q'] = query


        res = self.services.http_get(url, frmt=self.default_extension,
                params=params)

        if self.default_extension == "xml":
            res = self.easyXML(res)
        return res

    def graph(self, kind, source, target=None, direction=None, limit=1,
            frmt=None, datasource=None, organism=None):
        """Finds connections and neighborhoods of elements

        Connections can be for example the shortest path between two proteins
        or the neighborhood for a particular protein state or all states.

        Graph searches take detailed BioPAX semantics such as generics or
        nested complexes into account and traverse the graph accordingly.
        The starting points can be either physical entites or entity references.

        In the case of the latter the graph search starts from ALL
        the physical entities that belong to that particular entity references,
        i.e.  all of its states. Note that we integrate BioPAX data from
        multiple databases  based on our proteins and small molecules data
        warehouse and consistently normalize UnificationXref, EntityReference,
        Provenance, BioSource, and ControlledVocabulary objects when we are
        absolutely sure that two objects of the same type are equivalent. We,
        however, do not merge physical entities and reactions from different
        sources as matching and aligning pathways at that level is still an
        open research problem. As a result, graph searches can return
        several similar but disconnected sub-networks that correspond to
        the pathway data from different providers (though some physical
        entities often refer to the same small molecule or protein reference
        or controlled vocabulary).


        :param str kind: graph query
        :param str source:  source object's URI/ID. Multiple source URIs/IDs
            must be encoded as list of valid URI
            **source=['http://identifiers.org/uniprot/Q06609',
            'http://identifiers.org/uniprot/Q549Z0']**.
        :param str target: required for PATHSFROMTO graph query.  target
            URI/ID. Multiple target URIs must be encoded as list (see source
            parameter).
        :param str direction: graph search  direction in [BOTHSTREAM,
            DOWNSTREAM, UPSTREAM] see :attr:`_valid_directions` attribute.
        :param int limit: graph query search distance limit (default = 1).
        :param str format: output format. see :attr:`_valid-format`
        :param str datasource: datasource filter (same as for 'search').
        :param str organism: organism filter (same as for 'search').


        :return:  By default, graph queries return a complete BioPAX
            representation of the subnetwork matched by the algorithm.
            Other output formats are available as specified by the optional
            format parameter. Please be advised that some output format
            choices might cause "no result found" error if the conversion
            is not applicable for the BioPAX result (e.g., BINARY_SIF output
            fails if there are no interactions, complexes, nor pathways
            in the retrieved set).

        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(verbose=False)
            >>> res = pc2.graph(source="http://identifiers.org/uniprot/P20908",
                    kind="neighborhood", format="EXTENDED_BINARY_SIF")



        """
        url = "pc2/graph"
        params = {}
        params['source'] = source
        params['kind'] = kind
        params['limit'] = limit

        params = {}
        if target:
            params['target'] = target
        if frmt:
            params['format'] = frmt
        if datasource:
            params['datasource'] = datasource
        if organism:
            params['organism'] = organism

        res = self.services.http_get(url, frmt="txt", params=params)
        return res

    def traverse(self, uri, path):
        """Provides XPath-like access to the PC.


        The format of the path query is in the form::

            [InitialClass]/[property1]:[classRestriction(optional)]/[property2]... A "*"

        sign after the property instructs path accessor to transitively traverse
        that property. For example, the following path accessor will traverse
        through all physical entity components within a complex::

            "Complex/component*/entityReference/xref:UnificationXref"

        The following will list display names of all participants of
        interactions, which are components (pathwayComponent) of a pathway
        (note: pathwayOrder property, where same or other interactions can be
        reached, is not considered here)::

            "Pathway/pathwayComponent:Interaction/participant*/displayName"

        The optional parameter classRestriction allows to restrict/filter the
        returned property values to a certain subclass of the range of that
        property. In the first example above, this is used to get only the
        Unification Xrefs. Path accessors can use all the official BioPAX
        properties as well as additional derived classes and parameters in
        paxtools such as inverse parameters and interfaces that represent
        anonymous union classes in OWL. (See Paxtools documentation for more
        details).

        :param str uri: a biopax element URI - specified similar to the 'GET'
            command. multiple IDs are allowed as a list of strings.
        :param str path: a BioPAX propery path in the form of
                property1[:type1]/property2[:type2]; see above, inverse
                properties, Paxtools,
                org.biopax.paxtools.controller.PathAccessor.

        .. seealso:: `properties
            <http://www.pathwaycommons.org/pc2/#biopax_properties>`_

        :return:  XML result that follows the Search Response XML Schema
            (TraverseResponse type; pagination is disabled: returns all values at
            once)

        ::


            from bioservices import PathwayCommons
            pc2 = PathwayCommons(verbose=False)
            res = pc2.traverse(uri=['http://identifiers.org/uniprot/P38398','http://identifiers.org/uniprot/Q06609'], path="ProteinReference/organism")
            res = pc2.traverse(uri="http://identifiers.org/uniprot/Q06609",
                path="ProteinReference/entityReferenceOf:Protein/name")
            res = pc2.traverse("http://identifiers.org/uniprot/P38398",
                path="ProteinReference/entityReferenceOf:Protein")
            res = pc2.traverse(uri=["http://identifiers.org/uniprot/P38398",
                "http://identifiers.org/taxonomy/9606"], path="Named/name")


        """
        url =  "pc2/traverse?"

        if isinstance(uri, str):
            url += "?uri=" + uri
        elif isinstance(uri, list):
            url += "?uri=" + uri[0]
            for u in uri[1:]:
                url += "&uri=" + u

        url += "&path=" + path

        res = self.services.http_get(url, frmt="json")
        return res

    def get_sifgraph_neighborhood(self, source, limit=1, direction="BOTHSTREAM", pattern=None):
        """finds the neighborhood sub-network in the Pathway Commons Simple Interaction 
        Format (extented SIF) graph (see http://www.pathwaycommons.org/pc2/formats#sif)


        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param str direction: Graph traversal direction. Use UNDIRECTED if you want 
            to see interacts-with relationships too.
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.

        ::

            res = pc.get_sifgraph_neighborhood('BRD4')

        """
        self.services.devtools.check_param_in_list(direction, self._valid_directions)
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)

        params = {  "source": source,
                    "limit": limit,
                    "direction": direction}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/neighborhood", params=params,
            headers=self.services.get_headers(content="text"))

        return res.content


    def get_sifgraph_common_stream(self, source, limit=1, direction="DOWNSTREAM", pattern=None):
        """finds the common stream for them; extracts a sub-network from the loaded 
        Pathway Commons SIF model.

        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param str direction: Graph traversal direction. Use UNDIRECTED if you want 
            to see interacts-with relationships too.
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.

        ::

            res = pc.get_sifgraph_common_stream(['BRD4', 'MYC'])
        """
        self.services.devtools.check_param_in_list(direction, self._valid_directions)
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)

        params = {  "source": source,
                    "limit": limit,
                    "direction": direction}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/commonstream", params=params,
            headers=self.services.get_headers(content="text"))
        try:
            return res.content
        except:
            # if no match, returns code 406 and ""
            return None


    def get_sifgraph_pathsbetween(self, source, limit=1, directed=False, pattern=None):
        """finds the paths between them; extracts a sub-network from the Pathway Commons SIF graph.

        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param bool directed: Directionality: 'true' is for DOWNSTREAM/UPSTREAM, 'false' - UNDIRECTED
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.
        """
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)

        params = {  "source": source,
                    "limit": limit,
                    "directed": directed}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/pathsbetween", params=params,
            headers=self.services.get_headers(content="text"))

        return res.content


    def get_sifgraph_pathsfromto(self, source, target, limit=1, pattern=None):
        """finds the paths between them; extracts a sub-network from the Pathway Commons SIF graph.

        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        param target: A target set of gene identifiers.
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.
        """
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)
        if isinstance(target, str):
            target = [target]
        assert isinstance(target, list)
        target = ",".join(target)

        params = {  "source": source,
                    "target": target,
                    "limit": limit}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/pathsfromto", params=params,
            headers=self.services.get_headers(content="text"))

        return res.content
Ejemplo n.º 7
0
class Panther():
    """Interface to `Panther <http://www.pantherdb.org/services/oai/pantherdb>`_ pages


    ::

        >>> from bioservics import Panther
        >>> p = Panther()
        >>> p.get_supported_genomes()
        >>> p.get_ortholog("zap70", 9606)


        >>> from bioservics import Panther
        >>> p = Panther()
        >>> taxon = [x[0]['taxon_id'] for x in p.get_supported_genomes() if "coli" in x['name'].lower()]
        >>> # you may also use our method called search_organism
        >>> taxon = p.get_taxon_id(pattern="coli")
        >>> res = p.get_mapping("abrB,ackA,acuI", taxon)

    The get_mapping returns for each gene ID the GO terms corresponding to each
    ID. Those go terms may belong to different categories (see
    meth:`get_annotation_datasets`):

    - MF for molecular function
    - BP for biological process
    - PC for Protein class
    - CC Cellular location
    - Pathway

    Note that results from the website application http://pantherdb.org/
    do not agree with the oupput of the get_mapping service... Try out the dgt
    gene from ecoli for example




    """
    _url = "http://www.pantherdb.org/services/oai/pantherdb"

    def __init__(self, verbose=True, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        #super(Panther, self).__init__(name="Panther", url=Panther._url,
        #       verbose=verbose, cache=cache)
        self.services = REST(name="Panther",
                             url=Panther._url,
                             verbose=verbose,
                             cache=cache)

        self._allPathwaysURL = "http://www.pantherdb.org/pathway/pathwayList.jsp"

    def get_pathways(self):
        """Returns all pathways from pantherdb"""
        return self.services.http_get("supportedpantherpathways")

    def get_supported_genomes(self, type=None):
        """Returns list of supported organisms.

        :param type: can be chrLoc to restrict the search


        """
        if type is not None:
            params = {'type': type}
        else:
            params = {}
        res = self.services.http_get("supportedgenomes", params=params)
        res = [x for x in res["search"]["output"]["genomes"]['genome']]
        return res

    def get_taxon_id(self, pattern=None):
        """return all taxons supported by the service

        If pattern is provided, we filter the name to keep those that contain
        the filter. If only one is found, we return the name itself, otherwise a
        list of candidates

        """
        res = self.get_supported_genomes()
        if pattern:
            taxon = [
                x['taxon_id'] for x in res
                if pattern.lower() in x['name'].lower()
            ]
            if len(taxon) == 1:
                return taxon[0]
            else:
                return taxon
        else:
            taxon = [x["taxon_id"] for x in res]
            return taxon

    def get_mapping(self, gene_list, taxon):
        """Map identifiers

        Each identifier to be delimited by comma i.e. ',. Maximum of 1000 Identifiers
        can be any of the following: Ensemble gene identifier, Ensemble protein
        identifier, Ensemble transcript identifier, Entrez gene id, gene symbol, NCBI
        GI, HGNC Id, International protein index id, NCBI UniGene id, UniProt accession
        and UniProt id

        :param gene_list: see above
        :param taxon: one taxon ID. See supported
            :meth:`~bioservices.panther.Panther.get_supported_genomes`

        If an identifier is not found, information can be found in the
        unmapped_genes key while found identifiers are in the mapped_genes key.

        .. warning:: found and not found identifiers are dispatched into
            unmapped and mapped genes. If there are not found identifiers,
            the input gene list and the mapped genes list do not have the same
            length. The input names are not stored in the output.
            Developpers should be aware of that feature.

        """
        params = {"geneInputList": gene_list, "organism": taxon}
        res = self.services.http_post("geneinfo", params=params, frmt='json')

        if "mapped_genes" in res['search']:
            mapped_genes = res['search']['mapped_genes']['gene']
            # if only one identifier, retuns a dictionary.
            # if several identifiers, returns a list of dictionary.
            # We will be consistent and return a list
            if "accession" in mapped_genes:
                mapped_genes = [mapped_genes]
        else:
            mapped_genes = [{}]

        if "unmapped_list" in res['search']:
            unmapped_genes = res['search']['unmapped_list']["unmapped"]
            if isinstance(unmapped_genes, list):
                pass
            else:
                unmapped_genes = [unmapped_genes]
        else:
            unmapped_genes = []

        logger.warning("Some identifiers were not found")
        return {"unmapped": unmapped_genes, "mapped": mapped_genes}

    def get_enrichment(self,
                       gene_list,
                       organism,
                       annotation,
                       enrichment_test="Fisher",
                       correction="FDR",
                       ref_gene_list=None):
        """Returns over represented genes

        Compares a test gene list to a reference gene list,
        and determines whether a particular class (e.g. molecular function,
        biological process, cellular component, PANTHER protein class, the
        PANTHER pathway or Reactome pathway) of genes is overrepresented
        or underrepresented.

        :param organism: a valid taxon ID
        :param enrichment_test: either **Fisher** or **Binomial** test
        :param correction: correction for multiple testing. Either **FDR**,
            **Bonferonni**, or **None**.
        :param annotation: one of the supported PANTHER annotation data types.
            See :meth:`~bioservices.panther.Panther.get_annotation_datasets` to retrieve a list of
            supported annotation data types
        :param ref_gene_list: if not specified, the system will use all the genes
            for the specified organism. Otherwise, a list delimited by
            comma. Maximum of 100000 Identifiers can be any of the
            following: Ensemble gene identifier, Ensemble protein
            identifier, Ensemble transcript identifier, Entrez gene id,
            gene symbol, NCBI GI, HGNC Id, International protein index id,
            NCBI UniGene id, UniProt accession andUniProt id.

        :return: a dictionary with the following keys. 'reference' contains the
            orgnaism, 'input_list' is the input gene list with unmapped genes. 
            'result' contains the list of candidates. 

        ::

            >>> from bioservices import Panther
            >>> p = Panther()
            >>> res = p.get_enrichment('zap70,mek1,erk', 9606, "GO:0008150")
            >>> For molecular function, use :
            >>> res = p.get_enrichment('zap70,mek1,erk', 9606,
                    "ANNOT_TYPE_ID_PANTHER_GO_SLIM_MF")

        """
        assert enrichment_test.lower() in ['fisher', 'binomial']
        if correction is None:
            correction = 'none'

        assert correction.lower() in ['fdr', 'bonferroni', 'none']

        # This is a bug in panther DB where they used bonferonni . should be
        # bonferroni...
        if correction.lower() == "bonferroni":
            correction = "bonferonni"
        assert annotation in [x['id'] for x in self.get_annotation_datasets()]

        params = {'enrichmentTestType': enrichment_test.upper()}
        params['organism'] = organism
        if gene_list:
            params['geneInputList'] = gene_list
        if ref_gene_list:
            params['refInputList'] = ref_gene_list
        params['annotDataSet'] = annotation
        params['correction'] = correction.upper()
        try:
            res = self.services.http_post("enrich/overrep",
                                          params=params,
                                          frmt="json")
            try:
                return res['results']
            except:
                return res
        except:
            return res

    def get_annotation_datasets(self):
        """Retrieve the list of supported annotation data sets"""
        res = self.services.http_get("supportedannotdatasets")
        res = res["search"]["annotation_data_sets"]["annotation_data_type"]
        return res

    def get_ortholog(self,
                     gene_list,
                     organism,
                     target_organism=None,
                     ortholog_type="all"):
        """search for matching orthologs in target organisms.

        Searches for matching orthologs in the gene family that contains
        the search gene associated with the search terms. Returns
        ortholog genes in target organisms given a search organism,
        the search terms and a list of target organisms.

        :param gene_list:
        :param organism: a valid taxon ID
        :param target_organism: zero or more taxon IDs separated by ','. See
            :meth:`~bioservices.panther.Panther.get_supported_genomes`
        :param ortholog_type: optional parameter to specify ortholog type of target organism
        :return: a dictionary with "mapped" and "unmapped" keys, each of them
            being a list. For each unmapped gene, a dictionary with id and
            organism is is returned. For the mapped gene, a list of ortholog is
            returned.

        """
        assert ortholog_type in ['LDO', 'all']
        params = {
            "geneInputList": gene_list,
            "organism": organism,
            "targetOrganism": target_organism,
            "orthologType": ortholog_type
        }
        if params['targetOrganism'] is None:
            del params['targetOrganism']
        res = self.services.http_get("ortholog/matchortho",
                                     frmt='json',
                                     params=params)
        res = res['search']['mapping']
        mapped = res['mapped']

        try:
            unmapped = res['unmapped_ids']['unmapped']
            # make sure we always have a list
            if isinstance(unmapped, dict):
                unmapped = [unmapped]
        except:
            unmapped = []
        res = {"unmapped": unmapped, "mapped": mapped}

        return res

    def get_homolog_position(self,
                             gene,
                             organism,
                             position,
                             ortholog_type="all"):
        """

        :param gene: Can be any of the following: Ensemble gene identifier,
            Ensemble protein identifier, Ensemble transcript identifier, Entrez gene id,
            gene symbol, NCBI GI, HGNC Id, International protein index id, NCBI UniGene id,
            UniProt accession andUniProt id
        :param organism: a valid taxon ID
        :param ortholog_type: optional parameter to specify ortholog type of target organism
        """
        if "," in gene:
            logger.warning(
                "did not expect a comma. Please provide only one gene name")
        assert ortholog_type in ['LDO', 'all']
        assert position >= 1
        params = {
            "gene": gene,
            "organism": organism,
            "pos": position,
            "orthologType": ortholog_type
        }
        res = self.services.http_get("ortholog/homologpos",
                                     params=params,
                                     frmt="json")
        res = res['search']['mapping']
        if "mapped" in res.keys():
            res = res['mapped']
            return res
        elif "unmapped_ids" in res.keys():
            logger.warning("did not find any match for {}".format(gene))
            return res["unmapped_ids"]

    def get_supported_families(self, N=1000, progress=True):
        """Returns the list of supported PANTHER family IDs

        This services returns only 1000 items per request. This is defined by
        the index. For instance index set to 1 returns the first 1000 families.
        Index set to 2 returns families between index 1000 and 2000 and so on.
        As of 20 Feb 2020, there was about 15,000 families.

        This function simplifies your life by calling the service as many times
        as required. Therefore it returns all families in one go.

        """
        from easydev import Progress
        params = {'startIndex': 1}
        res = self.services.http_get("supportedpantherfamilies", params=params)
        results = res['search']['panther_family_subfam_list']['family']
        if len(results) != N:
            msg = "looks like the services changed. Call this function with N={}"
            msg = msg.format(len(results))
            raise ValueError(msg)

        number_of_families = res['search']['number_of_families']
        pb = Progress(int(number_of_families / N))
        pb.animate(1)
        for i in range(1, int(number_of_families / N) + 1):
            params = {'startIndex': i * N + 1}
            res = self.services.http_get("supportedpantherfamilies",
                                         params=params)
            data = res['search']['panther_family_subfam_list']['family']
            results.extend(data)
            if progress:
                pb.animate(i)
        return results

    def get_family_ortholog(self, family, taxon_list=None):
        """Search for matching orthologs in target organisms

        Also return the corresponding position in the target
        organism sequence. The system searches for matching
        orthologs in the gene family that contains the search
        gene associated with the search term.

        :param family: Family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.
        """

        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("familyortholog",
                                     params=params,
                                     frmt="json")
        return res['search']['ortholog_list']['ortholog']

    def get_family_msa(self, family, taxon_list=None):
        """Returns MSA information for the specified family.

        :param family: family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.

        """
        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("familymsa", params=params, frmt="json")
        return res['search']['MSA_list']['sequence_info']

    def get_tree_info(self, family, taxon_list=None):
        """Returns tree topology information and node attributes for the specified family.

        :param family: Family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.
        """
        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("treeinfo", params=params, frmt="json")
        return res['search']  #['tree_topology']['annotation_node']
Ejemplo n.º 8
0
class Rhea():
    """Interface to the `Rhea <http://www.ebi.ac.uk/rhea/rest/1.0/>`_ service

    You can search by compound name, ChEBI ID, reaction ID, cross reference
    (e.g., EC number) or citation (author name, title, abstract text, publication ID).
    You can use double quotes - to match an exact phrase - and the following
    wildcards:

        * ? (question mark = one character),
        * `*` (asterisk = several characters).

    Searching for caffe* will find reactions with participants such as caffeine,
    trans-caffeic acid or caffeoyl-CoA::

        from bioservices import Rhea
        r = Rhea()
        response = r.search("caffe*")

    Searching for a?e?o* will find reactions with participants such as acetoin,
    acetone or adenosine.::

        from bioservices import Rhea
        r = Rhea()
        response = r.search("a?e?o*")

    The :meth:`search` :meth:`entry` methods require a list of valid columns.
    By default all columns are used but you can restrict to only a few. Here is
    the description of the columns:

        rhea-id	:   reaction identifier (with prefix RHEA)
        equation :  textual description of the reaction equation
        chebi :	    comma-separated list of ChEBI names used as reaction participants
        chebi-id :  comma-separated list of ChEBI identifiers used as reaction participants
        ec :        comma-separated list of EC numbers (with prefix EC)
        uniprot :   number of proteins (UniProtKB entries) annotated with the Rhea reaction
        pubmed :    comma-separated list of PubMed identifiers (without prefix)

    and 5 cross-references:

        reaction-xref(EcoCyc)
        reaction-xref(MetaCyc)
        reaction-xref(KEGG)
        reaction-xref(Reactome)
        reaction-xref(M-CSA)
    """
    _url = "https://www.rhea-db.org"

    _valid_columns = [
        'rhea-id', 'equation', 'chebi', 'chebi-id', 'ec', 'uniprot', 'pubmed',
        'reaction-xref(EcoCyc)', 'reaction-xref(MetaCyc)',
        'reaction-xref(KEGG)', 'reaction-xref(Reactome)', 'reaction-ref(M-CSA)'
    ]

    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Rhea constructor

        :param bool verbose: True by default

        ::

            >>> from bioservices import Rhea
            >>> r = Rhea()
        """
        self.services = REST(name="Rhea",
                             url=Rhea._url,
                             verbose=verbose,
                             cache=cache)

    def search(self, query, columns=None, limit=None, frmt='tsv'):
        """Search for Rhea (mimics https://www.rhea-db.org/)

        :param str query: the search term using format parameter
        :param str format: the biopax2 or cmlreact format (default)

        :Returns: A pandas DataFrame. 

        ::

            >>> r = Rhea()
            >>> df = r.search("caffeine")
            >>> df = r.search("caffeine", columns='rhea-id,equation')


        """
        params = {}
        if limit:
            params['limit'] = limit
        if columns:
            params['columns'] = columns
        params['format'] = frmt
        if columns is None:
            params['columns'] = ",".join(self._valid_columns)

        response = self.services.http_get("rhea/?query={}".format(query),
                                          frmt="txt",
                                          params=params)

        try:
            import pandas as pd
            import io
            df = pd.read_csv(io.StringIO(response), sep='\t')
            return df
        except Exception as err:
            return response

    def query(self, query, columns=None, frmt="tsv", limit=None):
        """Retrieve a concrete reaction for the given id in a given format

        :param str query: the entry to retrieve
        :param str frmt: the result format (tsv); only tsv accepted for now (Nov
            2020).
        :param int limit: maximum number of results to retrieve
        :Returns: dataframe


        Retrieve Rhea reaction identifiers and equation text::

            r.query("", columns="rhea-id,equation", limit=10)

        Retrieve Rhea reactions with enzymes curated in UniProtKB (only first 10
        entries)::

            r.query("uniprot:*", columns="rhea-id,equation", limit=10)
            
        To retrieve a specific entry:: 

            df = r.get_entry("rhea:10661")


        .. versionchanged:: 1.8.0 (entry() method renamed in query() and no
            more format required. Must be given in the entry name e.g.
            query("10281.rxn") instead of entry(10281, format="rxn")
            the option *frmt* is now related to the result format

        """

        params = {"query": query}
        if limit:
            params['limit'] = limit
        if columns:
            params['columns'] = columns
        params['format'] = frmt
        if columns is None:
            params['columns'] = ",".join(self._valid_columns)

        response = self.services.http_get("rhea?".format(query),
                                          frmt="txt",
                                          params=params)
        try:
            import pandas as pd
            import io
            df = pd.read_csv(io.StringIO(response), sep='\t')
            return df
        except Exception as err:
            return response

    def get_metabolites(self, rxn_id):
        """Given a Rhea (http://www.rhea-db.org/) reaction id,
        returns its participant metabolites as a dict: {metabolite: stoichiometry},

        e.g. '2 H + 1 O2 = 1 H2O' would be represented ad {'H': -2, 'O2': -1, 'H2O': 1}.

        :param rxn_id: Rhea reaction id
        :return: dict of participant metabolites.
        """
        response = self.entry(rxn_id, frmt="cmlreact")

        reactants = [xx.attrs['title'] for xx in response.findAll("reactant")]
        products = [xx.attrs['title'] for xx in response.findAll("product")]
        return {"reactants": reactants, "products": products}
        """ms = defaultdict(lambda: 0)
Ejemplo n.º 9
0
class Seqret():
    """Interface to the `Seqret <http://www.ebi.ac.uk/readseq>`_ service

    ::

        >>> from bioservices import *
        >>> s = Seqret()

    The ReadSeq service was replaced by #the Seqret services (2015).

    .. versionchanged:: 0.15

    """
    def __init__(self, verbose=True):
        """.. rubric:: Constructor

        :param bool verbose:

        """
        url = "https://www.ebi.ac.uk/Tools/services/rest/emboss_seqret"
        self.services = REST(name="seqret", url=url, verbose=verbose)
        self._parameters = None

    def get_parameters(self):
        """Get a list of the parameter names.

        :returns: a list of strings giving the names of the parameters.

        """
        parameters = self.services.http_get("parameters", frmt="json")

        return parameters['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters, doc="Get list of parameter names")

    def get_parameter_details(self, parameterId):
        """Get details of a specific parameter.

        :param str parameter: identifier/name of the parameter to fetch details of.
        :return: a data structure describing the parameter and its values.

        ::

            rs = ReadSeq()
            print(rs.get_parameter_details("stype"))

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        request = "parameterdetails/" + parameterId
        res = self.services.http_get(request, frmt="json")
        return res

    def run(self, email, title, **kargs):
        """Submit a job to the service.

        :param str email: user e-mail address.
        :param str title: job title.
        :param params: parameters for the tool as returned by :meth:`get_parameter_details`.
        :return: string containing the job identifier (jobId).

        Deprecated (olf readseq service)::

            Format Name     Value
            Auto-detected   0
            EMBL            4
            GenBank         2
            Fasta(Pearson)  8
            Clustal/ALN     22
            ACEDB           25
            BLAST           20
            DNAStrider      6
            FlatFeat/FFF    23
            GCG             5
            GFF             24
            IG/Stanford     1
            MSF             15
            NBRF            3
            PAUP/NEXUS      17
            Phylip(Phylip4)     12
            Phylip3.2       11
            PIR/CODATA      14
            Plain/Raw       13
            SCF             21
            XML             19

        As output, you also have

        Pretty 18

        ::

            s = readseq.Seqret()
            jobid = s.run("*****@*****.**", "test", sequence=fasta, inputformat=8,
                outputformat=2)
            genbank = s.get_result(s._jobid)


        """
        for k in kargs.keys():
            self.services.devtools.check_param_in_list(k, self.parameters)

        assert "sequence" in kargs.keys()
        params = {"email": email, "title": title}

        for k in [
                'stype', 'inputformat', 'outputformat', "feature", "firstonly",
                "reverse", 'outputcase', 'seqrange'
        ]:
            if k in kargs.keys():
                value = kargs.get(k)
                details = self.get_parameter_details(k)
                valid_values = [
                    x['value'] for x in details['values']['values']
                ]
                self.services.devtools.check_param_in_list(
                    str(value), valid_values)
                params[k] = value
        #r = requests.post(url + "/run?", data={"sequence":fasta, "stype": "protein",
        #"inputformat":"raw", "outputformat":"fasta", "email":"*****@*****.**",
        #"title":"test"})

        params['sequence'] = kargs['sequence']

        jobid = self.services.http_post("run", frmt="txt", data=params)
        self._jobid = jobid
        return jobid

    def get_status(self, jobid=None):
        """Get the status of a submitted job.

        :param str jobid: job identifier.
        :return: string containing the status.

        The values for the status are:

        - RUNNING: the job is currently being processed.
        - FINISHED: job has finished, and the results can then be retrieved.
        - ERROR: an error occurred attempting to get the job status.
        - FAILURE: the job failed.
        - NOT_FOUND: the job cannot be found.

        """
        res = self.services.http_get("status/{}".format(jobid), frmt="txt")
        return res

    def get_result_types(self, jobid):
        """Get the available result types for a finished job.

        :param str jobid: job identifier.
        :return: a list of wsResultType data structures describing the available result types.
        """
        res = self.services.http_get("resulttypes/{}".format(jobid),
                                     frmt="json")
        return [x['identifier'] for x in res["types"]]

    def get_result(self, jobid, result_type="out"):
        """Get the result of a job of the specified type.

        :param str jobid: job identifier.
        :param parameters: optional list of wsRawOutputParameter used to
            provide additional parameters for derived result types.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "Your job is not finished yet. Try again later.")
            return

        #result_types = self.get_result_types(jobid)
        #assert parameters in result_types
        res = self.services.http_get("result/{}/{}".format(jobid, result_type),
                                     frmt="txt")

        return res
Ejemplo n.º 10
0
class PDB():
    """Interface to part of the `PDB <http://www.rcsb.org/pdb>`_ service

    :Status: in progress not for production. You can get all ID and retrieve
        uncompressed file in PDB/FASTA formats for now. New features will be
        added on request.

    .. doctest::

        >>> from bioservices import PDB
        >>> s = PDB()
        >>> res = s.get_file("1FBV", "pdb")

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "http://www.rcsb.org/pdb/rest"
        self.services = REST(name="PDB", url=url, verbose=verbose, cache=cache)

    def search(self, query):
        """
        <?xml version="1.0" encoding="UTF-8"?>
        <orgPdbQuery>
        <version>B0907</version>
        <queryType>org.pdb.query.simple.ExpTypeQuery</queryType>
        <description>Experimental Method Search : Experimental Method=SOLID-STATE NMR</description>
        <mvStructure.expMethod.value>SOLID-STATE NMR</mvStructure.expMethod.value>
        </orgPdbQuery>
        """
        res = self.http_post("search", frmt="xml", data=query)
        return res

    def get_current_ids(self):
        """Get a list of all current PDB IDs."""
        res = self.services.http_get("getCurrent", frmt="xml")
        res = self.services.easyXML(res)
        res = [x.attrib['structureId'] for x in res.getchildren()]
        return res

    def get_file(self, identifier, frmt, compression=False, headerOnly=False):
        """Download a file in a specified format

        :param int identifier: a valid Identifier. See :meth:`get_current_ids`.
        :param str fileFormat: a valid format in "pdb", "cif", "xml"

        .. doctest::

            >>> from bioservices import PDB
            >>> s = PDB()
            >>> res = s.get_file("1FBV", "pdb")
            >>> import tempfile
            >>> fh = tempfile.NamedTemporaryFile()
            >>> fh.write(res)
            >>> # manipulate the PDB file with your favorite tool
            >>> # close the file ONLY when finished (this is temporary file)
            >>> # fh.close()

        reference: http://www.rcsb.org/pdb/static.do?p=download/http/index.html
        """
        valid_formats = ["pdb", "cif", "xml"]
        self.services.devtools.check_param_in_list(frmt, valid_formats)
        self.services.devtools.check_param_in_list(headerOnly, [True, False])
        if headerOnly is True:
            headerOnly = "YES"
        else:
            headerOnly = "NO"

        query = "files/" + identifier + "." + frmt
        if compression is True:
            query += ".gz"

        params = {'headerOnly': headerOnly}

        if frmt == "xml":
            res = self.services.http_get(query, frmt=frmt, params=params)
            if compression is False:
                res = self.easyXML(res)
        else:
            res = self.services.http_get(query, frmt="txt", params=params)
        return res

    def get_ligands(self, identifier):
        """List the ligands that can be found in a PDB entry

        :param identifier: a valid PDB identifier (e.g., 4HHB)
        :return: xml document


            >>> from bioservices import PDB
            >>> s = PDB()
            >>> s.get_ligands("4HHB")

        Then, ::

            x = s.get_ligands("4HHB")
            from pyquery import PyQuery as pq
            d = pq(x)


        """

        res = self.services.http_get("rest/ligandInfo",
                                     frmt='xml',
                                     params={'structureId': identifier})
        return res

    def get_xml_query(self, query):
        """Send an XML query

        query = '<?xml version="1.0" encoding="UTF-8"?>
        <orgPdbQuery>
        <version>B0907</version>
        <queryType>org.pdb.query.simple.ExpTypeQuery</queryType>
        <description>Experimental Method Search : Experimental Method=SOLID-STATE NMR</description>
        <mvStructure.expMethod.value>SOLID-STATE NMR</mvStructure.expMethod.value>
        </orgPdbQuery>
        '
        """
        res = self.services.http_post(
            "query/post",
            data=query,
            headers=self.services.get_headers(content='default'))
        return res

    def get_go_terms(self, query):
        res = self.services.http_get("goTerms",
                                     params={"structureId": query},
                                     frmt="xml")
        res = self.services.easyXML(res)
        try:
            return res.content
        except:
            return res

    def get_ligand_info(self, query):
        res = self.services.http_get("ligandInfo",
                                     params={"structureId": query},
                                     frmt="xml")
        res = self.services.easyXML(res)
        try:
            return res.content
        except:
            return res
Ejemplo n.º 11
0
class NCBIblast():
    """Interface to the `NCBIblast <http://blast.ncbi.nlm.nih.gov/>`_ service.

    ::

        >>> from bioservices import *
        >>> s = NCBIblast(verbose=False)
        >>> jobid = s.run(program="blastp", sequence=s._sequence_example,
            stype="protein", database="uniprotkb", email="name@provider")
        >>> s.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.

    When running a blast request, a program is required. You can obtain the
    list using::

        >>> s.parametersDetails("program")
        [u'blastp', u'blastx', u'blastn', u'tblastx', u'tblastn']

    * blastn: Search a nucleotide database using a nucleotide query
    * blastp: Search protein database using a protein query
    * blastx: Search protein database using a translated nucleotide query
    * tblastn     Search translated nucleotide database using a protein query
    * tblastx     Search translated nucleotide database using a translated nucleotide query

    """

    _sequence_example = "MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS"

    def __init__(self, verbose=False):
        """.. rubric:: NCBIblast constructor

        :param bool verbose: prints informative messages

        """
        url = "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast"
        self.services = REST(name="NCBIblast", url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self.checkInterval = 2

    def get_parameters(self):
        """List parameter names.

        :returns: An XML document containing a list of parameter names.

        ::

            >>> from bioservices import ncbiblast 
            >>> n = ncbiblast.NCBIblast()
            >>> res = n.get_parameters()
            >>> [x.text for x in res.findAll("id")]

        .. seealso:: :attr:`parameters` to get a list of the parameters without
            need to process the XML output.
        """

        res = self.services.http_get("parameters",
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "Accept": "application/json"
                                     })
        return res['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

        :returns: An XML document providing details about the parameter or a list
            of values that can take the parameters if the XML could be parsed.

        For example::

            >>> s.parameter_details("matrix")
            [u'BLOSUM45',
             u'BLOSUM50',
             u'BLOSUM62',
             u'BLOSUM80',
             u'BLOSUM90',
             u'PAM30',
             u'PAM70',
             u'PAM250']

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request,
                                         frmt="json",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "Accept":
                                             "application/json"
                                         })

            try:
                data = [x['value'] for x in res["values"]["values"]]
            except:
                data = res
            self._parametersDetails[parameterId] = data
        return self._parametersDetails[parameterId]

    def run(self,
            program=None,
            database=None,
            sequence=None,
            stype="protein",
            email=None,
            **kargs):
        """ Submit a job with the specified parameters.

        .. python ncbiblast_urllib2.py -D ENSEMBL --email "*****@*****.**" --sequence
        .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS
        .. --program blastp --database uniprotkb


        .. rubric:: Compulsary arguments

        :param str program: BLAST program to use to perform the search (e.g., blastp)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param list database: list of database names for search or possible a single string (for one database).
            There are some mismatch between the output of parametersDetails("database") and
            the accepted values. For instance UniProt Knowledgebase should be
            given as "uniprotkb".
        :param str email: a valid email address. Will be checked by the service itself.

        .. rubric:: Optional arguments. If not provided, a default value will be used

        :param str type: query sequence type in 'dna', 'rna' or 'protein' (default is protein).
        :param str matrix: scoring matrix to be used in the search (e.g., BLOSUM45).
        :param bool gapalign:  perform gapped alignments.
        :param int alignments:     maximum number of alignments displayed in the output.
        :param exp:     E-value threshold.
        :param bool filter:  low complexity sequence filter to process the query
            sequence before performing the search.
        :param int scores:     maximum number of scores displayed in the output.
        :param int dropoff:     amount score must drop before extension of hits is halted.
        :param match_scores:     match/miss-match scores to generate a scoring matrix 
            for nucleotide searches.
        :param int gapopen:     penalty for the initiation of a gap.
        :param int gapext:     penalty for each base/residue in a gap.
        :param seqrange: region of the query sequence to use for the search. 
            Default: whole sequence.
        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up to data values accepted for each of these parameters can be
        retrieved from the :meth:`get_parameter_details`.

        For instance,::

            from bioservices import NCBIblast
            n = NCBIblast()
            n.get_parameter_details("program")

        Example::

            jobid = n.run(program="blastp",
                 sequence=n._sequence_example,
                 stype="protein",
                 database="uniprotkb",
                 email="*****@*****.**")

        Database can be a list of databases::

            database=["uniprotkb", "uniprotkb_swissprot"]

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/geeting the results.

        .. seealso:: :meth:`getResult`

        .. warning:: Cases are not important. Spaces in the database case should 
            be replaced by underscore.

        .. note:: database returned by the server have meaningless names since
            they do not map to the expected names. An example is "ENA Sequence Release" 
            that should be provided as em_rel

        http://www.ebi.ac.uk/Tools/sss/ncbiblast/help/index-nucleotide.html

        """
        # There are compulsary arguments:
        if program is None or sequence is None or database is None or email is None:
            raise ValueError(
                "program, sequence, email  and database must be provided")

        checkParam = self.services.devtools.check_param_in_list

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better
        checkParam(program, self.get_parameter_details("program"))
        checkParam(stype, ["protein", "dna", "rna"])

        # So far, we have these parameters
        params = {
            'program': program,
            'sequence': sequence,
            'email': email,
            'stype': stype
        }

        # all others are optional (actually type is also optional)
        # We can check all of the optional argument provided automatically.
        # this is fine for now but note for instance that stype could not be put
        # here because what is returned by parametersDetails is not exactly what
        # is expected.
        for k, v in kargs.items():
            #print(k, v)
            checkParam(v, self.get_parameter_details(k))
            params[k] = v

        # similarly for the database, we must process it by hand because ther
        # can be more than one database
        #checkParam(database.lower(), [str(x.replace(" ", "_").lower())
        #    for x in self.parametersDetails("database")])
        if isinstance(database, list):
            databases = database[:]
        elif isinstance(database, str):
            databases = [database]
        else:
            raise TypeError("database must be a string or a list of strings")
        params['database'] = databases
        """
parser.add_option('--seqrange', help='region within input to use as query')
# General options
parser.add_option('--title', help='job title')
parser.add_option('--outfile', help='file name for results')
parser.add_option('--outformat', help='output format for results')
parser.add_option('--async', action='store_true', help='asynchronous mode')
parser.add_option('--jobid', help='job identifier')
parser.add_option('--polljob', action="store_true", help='get job result')
parser.add_option('--status', action="store_true", help='get job status')
parser.add_option('--resultTypes', action='store_true', help='get result types')
    """
        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post("run",
                                      frmt=None,
                                      data=params,
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "text/plain"
                                      })

        return res

    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid:
        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get("status/{}".format(jobid),
                                     frmt="txt",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "text/plain"
                                     })
        return res

    def get_result_types(self, jobid):
        """ Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :param bool verbose: print the identifiers together with their label,
            mediaTypes, description and filesuffix.

        :return: A dictionary, which keys correspond to the identifiers. Each
            identifier is itself a dictionary containing the label, description,
            file suffix and mediaType of the identifier.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = 'resulttypes/' + jobid
        res = self.services.http_get(url,
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })
        return [x["identifier"] for x in res['types']]

    def get_result(self, jobid, result_type):
        """ Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str  result_type: type of result to retrieve. See :meth:`getResultTypes`.

        The output from the tool itself.
        Use the 'format' parameter to retireve the output in different formats,
        the 'compressed' parameter to retrieve the xml output in compressed form.
        Format options::

           0 = pairwise,
           1 = query-anchored showing identities,
           2 = query-anchored no identities,
           3 = flat query-anchored showing identities,
           4 = flat query-anchored no identities,
           5 = XML Blast output,
           6 = tabular,
           7 = tabular with comment lines,
           8 = Text ASN.1,
           9 = Binary ASN.1,
           10 = Comma-separated values,
           11 = BLAST archive format (ASN.1).

      See NCBI Blast documentation for details.
      Use the 'compressed' parameter to return the XML output in compressed form.
      e.g. '?format=5&compressed=true'.


        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid)
        if self.get_status(jobid) != "FINISHED":
            raise ValueError("job is not finished")
        url = 'result/' + jobid + '/' + result_type

        if result_type in ['out', "error", "sequence", "ids"]:
            res = self.services.http_get(url,
                                         frmt="txt",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "accept":
                                             "text/plain"
                                         })
        elif result_type in ['xml']:
            res = self.services.http_get(url,
                                         frmt="xml",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "accept":
                                             "text/plain"
                                         })
        return res

    def wait(self, jobId):
        """This function checks the status of a jobid while it is running

        :param str jobid: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between requests in seconds.

        """

        if self.checkInterval < 1:
            raise ValueError(
                "checkInterval must be positive and less than a second")
        result = 'PENDING'
        while result == 'RUNNING' or result == 'PENDING':
            result = self.get_status(jobId)
            if result == 'RUNNING' or result == 'PENDING':
                time.sleep(self.checkInterval)
        return result

    def _get_database(self):
        return self.get_parameter_details("database")

    databases = property(_get_database, doc=r"""Returns accepted databases.""")
Ejemplo n.º 12
0
class MUSCLE():
    """Interface to the `MUSCLE <http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest>`_ service.

    ::

        >>> from bioservices import *
        >>> m = MUSCLE(verbose=False)
        >>> sequencesFasta = open('filename','r')
        >>> jobid = n.run(frmt="fasta", sequence=sequencesFasta.read(),
                        email="name@provider")
        >>> s.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.


    Here is another similar example but we use :class:`~bioservices.uniprot.UniProt`
    class provided in bioservices to fetch the FASTA sequences::


        >>> from bioservices import UniProt, MUSCLE
        >>> u = UniProt(verbose=False)
        >>> f1 = u.get_fasta("P18413")
        >>> f2 = u.get_fasta("P18412")
        >>> m = MUSCLE(verbose=False)
        >>> jobid = m.run(frmt="fasta", sequence=f1+f2, email="name@provider")
        >>> m.getResult(jobid, "out")

    """
    def __init__(self, verbose=False):
        url = "http://www.ebi.ac.uk/Tools/services/rest/muscle"
        self.services = REST(name='MUSCLE', url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self._headers = {
            "User-Agent": self.services.getUserAgent(),
            "accept": "application/json"
        }

    def get_parameters(self):
        """List parameter names.

         :returns: An XML document containing a list of parameter names.

         ::

             >>> from bioservices import muscle
             >>> n = muscle.Muscle()
             >>> res = n.get_parameters()
             >>> [x.text for x in res.findAll("id")]

         .. seealso:: :attr:`parameters` to get a list of the parameters without
            need to process the XML output.
        """

        res = self.services.http_get("parameters",
                                     frmt="json",
                                     headers=self._headers)
        return res['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

          :returns: An XML document providing details about the parameter or a list
              of values that can take the parameters if the XML could be parsed.

          For example::

              >>> n.get_parameter_details("format")

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request,
                                         frmt="json",
                                         headers=self._headers)
            self._parametersDetails[parameterId] = res
        return res

    def run(self, frmt=None, sequence=None, tree="none", email=None):
        """ Submit a job with the specified parameters.

        .. python ncbiblast_urllib2.py -D ENSEMBL --email "*****@*****.**" --sequence
        .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS
        .. --program blastp --database uniprotkb


        .. rubric:: Compulsary arguments

        :param str frmt: input format (e.g., fasta)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param str tree: tree type ('none','tree1','tree2')
        :param str email: a valid email address. Will be checked by the service itself.

        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up to data values accepted for each of these parameters can be
        retrieved from the :meth:`get_parameter_details`.

        For instance,::

            from bioservices import MUSCLE
            m = MUSCLE()
            m.parameterDetails("tree")

        Example::

            jobid = m.run(frmt="fasta",
                 sequence=sequence_example,
                 email="*****@*****.**")

        frmt can be a list of formats::

            frmt=['fasta','clw','clwstrict','html','msf','phyi','phys']

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/geeting the results.

        .. seealso:: :meth:`getResult`

        """
        # There are compulsary arguments:
        if frmt is None or sequence is None or email is None:
            raise ValueError("frmt, sequence and email must be provided")

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better

        # FIXME: return parameters from server are not valid
        self.services.devtools.check_param_in_list(
            frmt, ['fasta', 'clw', 'clwstrict', 'html', 'msf', 'phyi', 'phys'])
        self.services.devtools.check_param_in_list(tree,
                                                   ['none', 'tree1', 'tree2'])

        # parameter structure
        params = {'format': frmt, 'sequence': sequence, 'email': email}

        # headers is muscle is not required. If provided
        # by the default values from bioservices, it does not
        # work.
        headers = {}

        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post("run",
                                      data=params,
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "text/plain"
                                      })
        return res

    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid:
        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get("status/{}".format(jobid),
                                     frmt="txt",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "text/plain"
                                     })
        return res

    def get_result_types(self, jobid):
        """ Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :param bool verbose: print the identifiers together with their label,
            mediaTypes, description and filesuffix.

        :return: A dictionary, which keys correspond to the identifiers. Each
            identifier is itself a dictionary containing the label, description,
            file suffix and mediaType of the identifier.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = 'resulttypes/' + jobid
        res = self.services.http_get(url,
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })
        return [x["identifier"] for x in res['types']]

    def get_result(self, jobid, result_type):
        """ Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str  resultType: type of result to retrieve. See :meth:`getResultTypes`.
 
        """
        if self.get_status(jobid) != 'FINISHED':  #pragma: no cover
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)

        if self.get_status(jobid) != "FINISHED":  #pragma: no cover
            raise ValueError("job is not finished")

        assert result_type in self.get_result_types(jobid)
        url = '/result/' + jobid + '/' + result_type

        if result_type in ['out', 'sequence', "aln-fasta", "pim", "phylotree"]:
            frmt = "txt"
        res = self.services.http_get(url,
                                     frmt=frmt,
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })

        return res

    def wait(self, jobId, checkInterval=5, verbose=True):
        """This function checks the status of a jobid while it is running

        :param str jobid: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between requests in seconds.

        """
        if checkInterval < 1:  #prgma: no cover
            raise ValueError(
                "checkInterval must be positive and less than minute")
        result = 'PENDING'
        while result == 'RUNNING' or result == 'PENDING':
            result = self.get_status(jobId)
            if verbose:
                # required from __future__ import print_function
                print("WARNING: ", jobId, " is ", result, file=sys.stderr)

            if result == 'RUNNING' or result == 'PENDING':
                time.sleep(checkInterval)
        return result
Ejemplo n.º 13
0
class Rhea():
    """Interface to the `Rhea <http://www.ebi.ac.uk/rhea/rest/1.0/>`_ service

    You can search by compound name, ChEBI ID, reaction ID, cross reference
    (e.g., EC number) or citation (author name, title, abstract text, publication ID).
    You can use double quotes - to match an exact phrase - and the following
    wildcards:

        * ? (question mark = one character),
        * `*` (asterisk = several characters).

    Searching for caffe* will find reactions with participants such as caffeine,
    trans-caffeic acid or caffeoyl-CoA::

        from bioservices import Rhea
        r = Rhea()
        response = r.search("caffe*")

    Searching for a?e?o* will find reactions with participants such as acetoin,
    acetone or adenosine.::

        from bioservices import Rhea
        r = Rhea()
        response = r.search("a?e?o*")

    See :meth:`search` :meth:`entry` methods for more information about format.

    """
    _url = "http://www.rhea-db.org/rest"

    def __init__(self, version="1.0", verbose=True, cache=False):
        """.. rubric:: Rhea constructor

        :param str version: the current version of the interface (1.0)
        :param bool verbose: True by default

        ::

            >>> from bioservices import Rhea
            >>> r = Rhea()
        """
        self.services = REST(name="Rhea",
                             url=Rhea._url,
                             verbose=verbose,
                             cache=cache)

        self.version = version
        self.format_entry = ["cmlreact", "biopax2", "rxn"]

    def search(self, query, frmt=None):
        """Search for reactions

        :param str query: the search term using format parameter
        :param str format: the biopax2 or cmlreact format (default)

        :Returns: An XML document containing the reactions with undefined
            direction, with links to the corresponding bi-directional ones.
            The format is easyXML.

        ::

            >>> r = Rhea()
            >>> r.search("caffeine")  # id 10280
            >>> r.search("caffeine", frmt="biopax2")  # id 10280

        The output is in XML format. This page from the Rhea web site explains
        what are the `data fields <http://www.ebi.ac.uk/rhea/manual.xhtml>`_ of
        the XML file.

        """
        if frmt is None:
            frmt = "cmlreact"  # default is cmlreact
        if frmt not in ["biopax2", "cmlreact"]:
            raise ValueError(
                "format must be either cmlreact (default) or biopax2")

        url = self.version + "/ws/reaction/%s?q=" % frmt
        url += query

        response = self.services.http_get(url, frmt="xml")

        #response = self.services.easyXML(response)
        return response

    def entry(self, id, frmt):
        """Retrieve a concrete reaction for the given id in a given format

        :param int id: the id of a reaction
        :param format: can be rxn, biopax2, or cmlreact
        :Returns: An XML document containing the reactions with undefined
            direction, with links to the corresponding bi-directional ones.
            The format is easyXML. If frmt is rnx,

        ::

            >>> print(r.entry(10281, frmt="rxn"))

        The output is in XML format. This page from the Rhea web site explains
        what are the `data fields <http://www.ebi.ac.uk/rhea/manual.xhtml>`_ of
        the XML file.
        """
        self.services.devtools.check_param_in_list(frmt, self.format_entry)
        url = self.version + "/ws/reaction/%s/%s" % (frmt, id)

        if frmt == "rxn":
            response = self.services.http_get(url, frmt="txt")
        else:
            response = self.services.http_get(url, frmt="xml")
            response = self.services.easyXML(response)
        return response

    def get_metabolites(self, rxn_id):
        """Given a Rhea (http://www.rhea-db.org/) reaction id,
        returns its participant metabolites as a dict: {metabolite: stoichiometry},

        e.g. '2 H + 1 O2 = 1 H2O' would be represented ad {'H': -2, 'O2': -1, 'H2O': 1}.

        :param rxn_id: Rhea reaction id
        :return: dict of participant metabolites.
        """
        response = self.entry(rxn_id, frmt="cmlreact")

        reactants = [xx.attrs['title'] for xx in response.findAll("reactant")]
        products = [xx.attrs['title'] for xx in response.findAll("product")]
        return {"reactants": reactants, "products": products}
        """ms = defaultdict(lambda: 0)
Ejemplo n.º 14
0
class BioDBNet():
    """Interface to the `BioDBNet <http://biodbnet.abcc.ncifcrf.gov/>`_ service

    ::

        >>> from bioservices import *
        >>> s = BioDBNet()

    Most of the BioDBNet WSDL are available. There are functions added to
    the original interface such as :meth:`extra_getReactomeIds`.

    Use :meth:`db2db` to convert from 1 database to some databases.
    Use :meth:`dbReport` to get the convertion from one database to all
    databases.

    """
    _url = 'https://biodbnet-abcc.ncifcrf.gov/webServices/rest.php/biodbnetRestApi.json'

    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Constructor

        :param bool verbose:

        """
        self.services = REST(name="BioDBNet",
                             url=BioDBNet._url,
                             verbose=verbose,
                             cache=cache)
        self._valid_inputs = self.getInputs()

    def _list_to_string(self, values):
        if isinstance(values, list):
            values = ",".join(values)
        return values

    def _interpret_output_db(self, input_db, output_db):
        # in biodbnet, the database can be provided as
        # in the output of getInputs() that is with capitals and spaces
        # or with no spaces and no big caps.
        # Here, like in _check_db(), with convert everything to small caps and
        # remove spaces so as to compare the input/output databases with the
        # list of databases returned by getInputs
        outputs = self._list_to_string(output_db)
        inputResult = self.getInputs()
        #getOutputsForInput method
        outputResult = self.getOutputsForInput(input_db)
        outputResult = [this.lower().replace(" ", "") for this in outputResult]
        for output in outputs.split(","):
            if output.lower().replace(" ", "") not in outputResult:
                raise ValueError(output + " not found")
        return outputs

    def _check_db(self, value):
        def convert(value):
            return value.lower().replace(" ", "")

        if convert(value) not in [
                convert(this) for this in self._valid_inputs
        ]:
            raise ValueError(
                "Invalid value {} not a known database".format(value))

    def db2db(self, input_db, output_db, input_values, taxon=9606):
        """Retrieves models associated to the provided Taxonomy text.

        :param input_db: input database.
        :param output_db: list of databases to map to.
        :param input_values: list of identifiers to map to the output databases
        :return:  dataframe where index correspond to the input database
            identifiers. The columns contains the identifiers for each output
            database (see example here below).

        ::

            >>> from bioservices import BioDBNet
            >>> input_db = 'Ensembl Gene ID'
            >>> output_db = ['Gene Symbol']
            >>> input_values = ['ENSG00000121410', 'ENSG00000171428']
            >>> df = s.db2db(input_db, output_db, input_values, 9606)
                            Gene Symbol
            Ensembl Gene ID
            ENSG00000121410        A1BG
            ENSG00000171428        NAT1

        """
        self._check_db(input_db)
        # This also check that the outputs exist and are compatible with the
        # input.
        outputs = self._interpret_output_db(input_db, output_db)

        url = self._url + "?method=db2db"
        url += "&input={}".format(input_db)
        url += "&outputs={}".format(outputs)
        url += "&inputValues={}".format(self._list_to_string(input_values))
        url += "&taxonId={}".format(taxon)
        url += "&format={}".format("row")
        request = self.services.http_get(url)
        try:  # TODO can be removed in v2
            df = pd.DataFrame(request)
            df.set_index("InputValue", inplace=True)
            df.index.name = input_db
            return df
        except Exception as err:
            self.logging.error(err)
            return request

    def dbFind(self, output_db, input_values, taxon="9606"):
        """dbFind method

        dbFind can be used when you do not know the actual type of your identifiers or
        when you have a mixture of different types of identifiers. The tool finds the
        identifier type and converts them into the selected output if the identifiers
        are within the network. 

        :param str output_db: valid database name
        :param list input_values: list of identifiers to look for
        :return: a dataframe with index set to the input values.


        ::

            >>> b.dbFind("Gene ID", ["ZMYM6_HUMAN", "NP_710159", "ENSP00000305919"])
                            Gene ID                Input Type
            InputValue
            ZMYM6_HUMAN        9204        UniProt Entry Name
            NP_710159        203100  RefSeq Protein Accession
            ENSP00000305919  203100        Ensembl Protein ID

        """
        self._check_db(output_db)

        url = self._url + "?method=dbfind"
        url += "&output={}".format(output_db)
        url += "&inputValues={}".format(self._list_to_string(input_values))
        url += "&taxonId={}".format(taxon)
        url += "&format={}".format("row")
        request = self.services.http_get(url)
        try:
            return pd.DataFrame(request).set_index("InputValue")
        except:
            return request

    def dbOrtho(self, input_db, output_db, input_values, input_taxon,
                output_taxon):
        """Convert identifiers from one species to identifiers of a different species

        :param input_db: input database
        :param output_db: output database
        :param input_values: list of identifiers to retrieve
        :param input_taxon: input taxon
        :param output_taxon: output taxon
        :return:  dataframe where index correspond to the input database
            identifiers. The columns contains the identifiers for each output
            database (see example here below)

        ::

            >>> df = b.dbOrtho("Gene Symbol", "Gene ID", ["MYC", "MTOR", "A1BG"],
            ...                    input_taxon=9606, output_taxon=10090)
                 Gene ID InputValue
            0   17869        MYC
            1   56717       MTOR
            2  117586       A1BG

        """
        self._check_db(input_db)
        self._check_db(output_db)
        url = self._url + "?method=dbortho"
        url += "&input={}".format(input_db)
        url += "&output={}".format(output_db)
        url += "&inputValues={}".format(self._list_to_string(input_values))
        url += "&inputTaxon={}".format(input_taxon)
        url += "&outputTaxon={}".format(output_taxon)
        url += "&format={}".format("row")
        request = self.services.http_get(url)

        try:
            df = pd.DataFrame(request).set_index("InputValue")
            df.index.name = input_db
            return df
        except:
            return request

    def dbReport(self, input_db, input_values, taxon=9606):
        """Same as :meth:`db2db` but returns results for all possible outputs.

        :param input_db: input database
        :param input_values: list of identifiers to retrieve
        :return:  dataframe where index correspond to the input database
            identifiers. The columns contains the identifiers for each output
            database (see example here below)

        ::

            df = s.dbReport("Ensembl Gene ID", ['ENSG00000121410', 'ENSG00000171428'])

        """
        self._check_db(input_db)
        # This also check that the outputs exist and are compatible with the
        # input.
        url = self._url + "?method=dbreport"
        url += "&input={}".format(input_db)
        url += "&inputValues={}".format(self._list_to_string(input_values))
        url += "&taxonId={}".format(taxon)
        url += "&format={}".format("row")
        request = self.services.http_get(url)
        try:  # TODO can be removed in v2
            df = pd.DataFrame(request)
            df.set_index("InputValue", inplace=True)
            df.index.name = input_db
            return df
        except Exception as err:
            self.logging.error(err)
            return request
        inputValues = self._interpret_input_db(inputValues)

        # df = pd.readcsv(io.StringIO(res.strip()), sep="\t")

    def dbWalk(self, db_path, input_values, taxon=9606):
        """Walk through biological database network

        dbWalk is a form of database to database conversion where the user has complete
        control on the path to follow while doing the conversion. When a input/node is
        added to the path the input selection gets updated with all the nodes that it
        can access directly.

        :param db_path: path to follow in the databases 
        :param input_values: list of identifiers 
        :return: a dataframe with columns corresponding to the path nodes

        A typical example is to get the Ensembl mouse homologs for  
        Ensembl Gene ID's from human. This conversion is not possible 
        through :meth:`db2db` as Homologene does not have
        Ensembl ID's and the input and output nodes to acheive this would both be
        'Ensembl Gene ID'. It can however be run by using dbWalk as follows.
        Add Ensembl Gene ID to the path, then add Gene Id, Homolog - Mouse Gene ID
        and Ensebml Gene ID to complete the path.

        ::

            db_path = "Ensembl Gene ID->Gene ID->Homolog - Mouse Gene ID->Ensembl Gene ID"
            s.dbWalk(db_path, ["ENSG00000175899"])

        .. todo:: check validity of the path

        """
        url = self._url + "?method=dbwalk"
        url += "&inputValues={}".format(self._list_to_string(input_values))
        url += "&dbPath={}".format(db_path)
        url += "&taxonId={}".format(taxon)
        url += "&format={}".format("row")
        request = self.services.http_get(url)
        try:
            return pd.DataFrame(request)
        except:
            return request

    def getDirectOutputsForInput(self, input_db):
        """Gets all the direct output nodes for a given input node

        Gets all the direct output nodes for a given input node
        Outputs reachable by single edge connection in the bioDBnet graph.

        ::

            b.getDirectOutputsForInput("genesymbol")
            b.getDirectOutputsForInput("Gene Symbol")
            b.getDirectOutputsForInput("pdbid")
            b.getDirectOutputsForInput("PDB ID")
        """
        self._check_db(input_db)

        url = self._url
        url += "?method=getdirectoutputsforinput"
        url += "&input={}&directOutput=1".format(input_db)
        request = self.services.http_get(url)
        try:
            return request['output']
        except:
            return request

    def getInputs(self):
        """Return list of possible input database

        ::

            s.getInputs()
        """
        request = self.services.http_get(self._url + "?method=getinputs")
        try:
            return request['input']
        except:
            return request

    def getOutputsForInput(self, input_db):
        """Return list of possible output database for a given input database

        ::

            s.getOutputsForInput("UniProt Accession")

        """
        self._check_db(input_db)
        url = self._url + "?method=getoutputsforinput"
        url += "&input={}".format(input_db)
        request = self.services.http_get(url)
        try:
            return request['output']
        except:
            return request