コード例 #1
0
class PDBe():
    """Interface to part of the `PDBe <http://www.ebi.ac.uk/pdbe>`_ service

    .. doctest::

        >>> from bioservices import PDBe
        >>> s = PDBe()
        >>> res = s.get_file("1FBV", "pdb")

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def _check_id(self, pdbid):
        if isinstance(pdbid, list):
            pdbid = ",".join(pdbid)

        if isinstance(pdbid, str):
            for item in pdbid.split(","):
                assert len(item) == 4, "a 4-character PDB id code is required"
        else:
            raise TypeError(
                "pdb id must be either a 4-character pdb id, a list of valid PDB ids, or a string made of pdb ids, separated by commas"
            )

        return pdbid

    def _return(self, res):
        if res == 404:
            return {}
        return res

    def get_summary(self, query):
        """Returns summary of a PDB entry

        This can be title of the entry, list of depositors, date of deposition,
        date of release, date of latest revision, experimental method, list
        of related entries in case split entries, etc.

        :param query: a 4-character PDB id code

        ::

            p.get_summary('1cbs')
            p.get_summary('1cbs,2kv8')
            p.get_summary(['1cbs', '2kv8'])

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("summary/{}".format(query))
        else:
            res = self.services.http_post("summary", data=query, frmt="json")
        return self._return(res)

    def get_molecules(self, query):
        """Return details of molecules  (or entities in mmcif-speak) modelled in the entry

        This can be entity id, description, type, polymer-type (if applicable), number
        of copies in the entry, sample preparation method, source organism(s)
        (if applicable), etc.

        :param query: a 4-character PDB id code

        ::

            p.get_molecules('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("molecules/{}".format(query))
        else:
            res = self.services.http_post("molecules", data=query, frmt="json")
        return self._return(res)

    def get_related_publications(self, query):
        """Return publications obtained from both EuroPMC and UniProt. T


        These are articles which cite the primary citation of the entry, or
        open-access articles which mention the entry id without explicitly citing the
        primary citation of an entry.


        :param query: a 4-character PDB id code

        ::

            p.get_related_publications('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "related_publications/{}".format(query))
        else:
            res = self.services.http_post("related_publications/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_experiment(self, query):
        """Provides details of experiment(s) carried out in determining the structure of the entry.

        Each experiment is described in a separate dictionary.
        For X-ray diffraction, the description consists of resolution, spacegroup, cell
        dimensions, R and Rfree, refinement program, etc.
        For NMR, details of spectrometer, sample, spectra, refinement, etc. are
        included.
        For EM, details of specimen, imaging, acquisition, reconstruction, fitting etc.
        are included.

        :param query: a 4-character PDB id code

        ::

            p.get_experiment('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("experiment/{}".format(query))
        else:
            res = self.services.http_post("experiment/{}",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_nmr_resources(self, query):
        """This call provides URLs of available additional resources for NMR
        entries. E.g., mapping between structure (PDB) and chemical shift (BMRB)
        entries.
        :param query: a 4-character PDB id code

        ::

            p.get_nmr_resources('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("nmr_resources/{}".format(query))
        else:
            res = self.services.http_post("nmr_resources/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_ligand_monomers(self, query):
        """Provides a a list of modelled instances of ligands,

        ligands i.e. 'bound' molecules that are not waters.

        :param query: a 4-character PDB id code

        ::

            p.get_ligand_monomers('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("ligand_monomers/{}".format(query))
        else:
            res = self.services.http_post("ligand_monomers",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_modified_residues(self, query):
        """Provides a list of modelled instances of modified amino acids or
        nucleotides in protein, DNA or RNA chains.


        :param query: a 4-character PDB id code

        ::

            p.get_modified_residues('4v5j')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("modified_AA_or_NA/{}".format(query))
        else:
            res = self.services.http_post("modified_AA_or_NA",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_mutated_residues(self, query):
        """Provides a list of modelled instances of mutated amino acids or
        nucleotides in protein, DNA or RNA chains.


        :param query: a 4-character PDB id code

        ::

            p.get_mutated_residues('1bgj')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("mutated_AA_or_NA/{}".format(query))
        else:
            res = self.services.http_get("mutated_AA_or_NA",
                                         data=query,
                                         frmt="json")
        return self._return(res)

    def get_release_status(self, query):
        """Provides status of a PDB entry (released, obsoleted, on-hold etc)
        along with some other information such as authors, title, experimental method,
        etc.

        :param query: a 4-character PDB id code

        ::

            p.get_release_status('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("status/{}".format(query))
        else:
            res = self.services.http_get("status/{}", data=query, frmt="json")
        return self._return(res)

    def get_observed_ranges(self, query):
        """Provides observed ranges, i.e., segments of structural coverage of
         polymeric molecues that are modelled fully or partly

        :param query: a 4-character PDB id code

        ::

            p.get_observed_ranges('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("polymer_coverage/{}".format(query))
        else:
            res = self.services.http_post("polymer_coverage",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_observed_ranges_in_pdb_chain(self, query, chain_id):
        """Provides observed ranges, i.e., segments of structural coverage of
         polymeric molecules in a particular chain

        :param query: a 4-character PDB id code
        :param query: a PDB chain ID

        ::

            p.get_observed_ranges_in_pdb_chain('1cbs', "A")


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        res = self.services.http_get("polymer_coverage/{}/chain/{}".format(
            query, chain_id))
        return self._return(res)

    def get_secondary_structure(self, query):
        """Provides residue ranges of regular secondary structure 

        (alpha helices and beta strands) found in protein chains of the entry.
        For strands, sheet id can be used to identify a beta sheet.



        :param query: a 4-character PDB id code

        ::

            p.get_secondary_structure('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "secondary_structure/{}".format(query))
        else:
            res = self.services.http_post("secondary_structure/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_residue_listing(self, query):
        """Provides lists all residues (modelled or otherwise) in the entry.
    
        Except waters, along with details of the fraction of expected atoms modelled for
        the residue and any alternate conformers.


        :param query: a 4-character PDB id code

        ::

            p.get_residue_listing('1cbs')


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("residue_listing/{}".format(query))
        return self._return(res)

    def get_residue_listing_in_pdb_chain(self, query, chain_id):
        """Provides all residues (modelled or otherwise) in the entry

        Except waters, along with details of the fraction of expected atoms 
        modelled for the residue and any alternate conformers.

        :param query: a 4-character PDB id code
        :param query: a PDB chain ID

        ::

            p.get_residue_listing_in_pdb_chain('1cbs')


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("residue_listing/{}".format(
                query, chain_id))
        return self._return(res)

    def get_binding_sites(self, query):
        """Pprovides details on binding sites in the entry

        STRUCT_SITE records in PDB files (or mmcif equivalent thereof), such as ligand,
        residues in the site, description of the site, etc.


        :param query: a 4-character PDB id code

        ::

            p.get_binding_sites('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("binding_sites/{}".format(query))
        else:
            res = self.services.http_post("binding_sites",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_files(self, query):
        """Provides URLs and brief descriptions (labels) for PDB entry

        Also, for mmcif files, biological assembly files, FASTA file for sequences, 
        SIFTS cross reference XML files, validation XML files, X-ray structure 
        factor file, NMR experimental constraints files, etc. 

        :param query: a 4-character PDB id code

        ::

            p.get_files('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("files/{}".format(query))
        else:
            res = self.services.http_post("files", data=query, frmt="json")
        return self._return(res)

    def get_observed_residues_ratio(self, query):
        """Provides the ratio of observed residues for each chain in each molecule

        The list of chains within an entity is sorted by observed_ratio (descending order),
         partial_ratio (ascending order), and number_residues (descending order).

        :param query: a 4-character PDB id code

        ::

            p.get_observed_residues_ratio('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "observed_residues_ratio/{}".format(query))
        else:
            res = self.services.http_post("observed_residues_ratio",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_assembly(self, query):
        """Provides information for each assembly of a given PDB ID. T

        This information is broken down at the entity level for each assembly. The
        information given includes the molecule name, type and class, the chains where
        the molecule occur, and the number of copies of each entity in the assembly.

        :param query: a 4-character PDB id code

        ::

            p.get_assembly('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("assembly/{}".format(query))
        else:
            res = self.services.http_post("assembly", data=query, frmt="json")
        return self._return(res)

    def get_electron_density_statistics(self, query):
        """This call details the statistics for electron density.

        :param query: a 4-character PDB id code

        ::

            p.get_electron_density_statistics('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "electron_density_statistics/{}".format(query))
        else:
            res = self.services.http_post("electron_density_statistics",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_functional_annotation(self, query):
        """Provides functional annotation of all ligands, i.e. 'bound'

        :param query: a 4-character PDB id code

        ::

            p.get_functional_annotation('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("cofactor/{}".format(query))
        else:
            res = self.services.http_post("cofactor", data=query, frmt="json")
        return self._return(res)

    def get_drugbank_annotation(self, query):
        """This call provides DrugBank annotation of all ligands, i.e. 'bound'

        :param query: a 4-character PDB id code

        ::

            p.get_drugbank_annotation('5hht')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("drugbank/{}".format(query))
        else:
            res = self.services.http_post("drugbank", data=query, frmt="json")
        return self._return(res)

    def get_related_dataset(self, query):
        """Provides DOI’s for related raw experimental datasets

        Includes diffraction image data, small-angle scattering data and
        electron micrographs.


        :param query: a 4-character PDB id code

        ::

            p.get_cofactor('5o8b')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "related_experiment_data/{}".format(query))
        else:
            res = self.services.http_post("related_experiment_data",
                                          data=query,
                                          frmt="json")
        return self._return(res)
コード例 #2
0
ファイル: panther.py プロジェクト: shunsunsun/bioservices
class Panther():
    """Interface to `Panther <http://www.pantherdb.org/services/oai/pantherdb>`_ pages


    ::

        >>> from bioservics import Panther
        >>> p = Panther()
        >>> p.get_supported_genomes()
        >>> p.get_ortholog("zap70", 9606)


        >>> from bioservics import Panther
        >>> p = Panther()
        >>> taxon = [x[0]['taxon_id'] for x in p.get_supported_genomes() if "coli" in x['name'].lower()]
        >>> # you may also use our method called search_organism
        >>> taxon = p.get_taxon_id(pattern="coli")
        >>> res = p.get_mapping("abrB,ackA,acuI", taxon)

    The get_mapping returns for each gene ID the GO terms corresponding to each
    ID. Those go terms may belong to different categories (see
    meth:`get_annotation_datasets`):

    - MF for molecular function
    - BP for biological process
    - PC for Protein class
    - CC Cellular location
    - Pathway

    Note that results from the website application http://pantherdb.org/
    do not agree with the oupput of the get_mapping service... Try out the dgt
    gene from ecoli for example




    """
    _url = "http://www.pantherdb.org/services/oai/pantherdb"

    def __init__(self, verbose=True, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        #super(Panther, self).__init__(name="Panther", url=Panther._url,
        #       verbose=verbose, cache=cache)
        self.services = REST(name="Panther",
                             url=Panther._url,
                             verbose=verbose,
                             cache=cache)

        self._allPathwaysURL = "http://www.pantherdb.org/pathway/pathwayList.jsp"

    def get_pathways(self):
        """Returns all pathways from pantherdb"""
        return self.services.http_get("supportedpantherpathways")

    def get_supported_genomes(self, type=None):
        """Returns list of supported organisms.

        :param type: can be chrLoc to restrict the search


        """
        if type is not None:
            params = {'type': type}
        else:
            params = {}
        res = self.services.http_get("supportedgenomes", params=params)
        res = [x for x in res["search"]["output"]["genomes"]['genome']]
        return res

    def get_taxon_id(self, pattern=None):
        """return all taxons supported by the service

        If pattern is provided, we filter the name to keep those that contain
        the filter. If only one is found, we return the name itself, otherwise a
        list of candidates

        """
        res = self.get_supported_genomes()
        if pattern:
            taxon = [
                x['taxon_id'] for x in res
                if pattern.lower() in x['name'].lower()
            ]
            if len(taxon) == 1:
                return taxon[0]
            else:
                return taxon
        else:
            taxon = [x["taxon_id"] for x in res]
            return taxon

    def get_mapping(self, gene_list, taxon):
        """Map identifiers

        Each identifier to be delimited by comma i.e. ',. Maximum of 1000 Identifiers
        can be any of the following: Ensemble gene identifier, Ensemble protein
        identifier, Ensemble transcript identifier, Entrez gene id, gene symbol, NCBI
        GI, HGNC Id, International protein index id, NCBI UniGene id, UniProt accession
        and UniProt id

        :param gene_list: see above
        :param taxon: one taxon ID. See supported
            :meth:`~bioservices.panther.Panther.get_supported_genomes`

        If an identifier is not found, information can be found in the
        unmapped_genes key while found identifiers are in the mapped_genes key.

        .. warning:: found and not found identifiers are dispatched into
            unmapped and mapped genes. If there are not found identifiers,
            the input gene list and the mapped genes list do not have the same
            length. The input names are not stored in the output.
            Developpers should be aware of that feature.

        """
        params = {"geneInputList": gene_list, "organism": taxon}
        res = self.services.http_post("geneinfo", params=params, frmt='json')

        if "mapped_genes" in res['search']:
            mapped_genes = res['search']['mapped_genes']['gene']
            # if only one identifier, retuns a dictionary.
            # if several identifiers, returns a list of dictionary.
            # We will be consistent and return a list
            if "accession" in mapped_genes:
                mapped_genes = [mapped_genes]
        else:
            mapped_genes = [{}]

        if "unmapped_list" in res['search']:
            unmapped_genes = res['search']['unmapped_list']["unmapped"]
            if isinstance(unmapped_genes, list):
                pass
            else:
                unmapped_genes = [unmapped_genes]
        else:
            unmapped_genes = []

        logger.warning("Some identifiers were not found")
        return {"unmapped": unmapped_genes, "mapped": mapped_genes}

    def get_enrichment(self,
                       gene_list,
                       organism,
                       annotation,
                       enrichment_test="Fisher",
                       correction="FDR",
                       ref_gene_list=None):
        """Returns over represented genes

        Compares a test gene list to a reference gene list,
        and determines whether a particular class (e.g. molecular function,
        biological process, cellular component, PANTHER protein class, the
        PANTHER pathway or Reactome pathway) of genes is overrepresented
        or underrepresented.

        :param organism: a valid taxon ID
        :param enrichment_test: either **Fisher** or **Binomial** test
        :param correction: correction for multiple testing. Either **FDR**,
            **Bonferonni**, or **None**.
        :param annotation: one of the supported PANTHER annotation data types.
            See :meth:`~bioservices.panther.Panther.get_annotation_datasets` to retrieve a list of
            supported annotation data types
        :param ref_gene_list: if not specified, the system will use all the genes
            for the specified organism. Otherwise, a list delimited by
            comma. Maximum of 100000 Identifiers can be any of the
            following: Ensemble gene identifier, Ensemble protein
            identifier, Ensemble transcript identifier, Entrez gene id,
            gene symbol, NCBI GI, HGNC Id, International protein index id,
            NCBI UniGene id, UniProt accession andUniProt id.

        :return: a dictionary with the following keys. 'reference' contains the
            orgnaism, 'input_list' is the input gene list with unmapped genes. 
            'result' contains the list of candidates. 

        ::

            >>> from bioservices import Panther
            >>> p = Panther()
            >>> res = p.get_enrichment('zap70,mek1,erk', 9606, "GO:0008150")
            >>> For molecular function, use :
            >>> res = p.get_enrichment('zap70,mek1,erk', 9606,
                    "ANNOT_TYPE_ID_PANTHER_GO_SLIM_MF")

        """
        assert enrichment_test.lower() in ['fisher', 'binomial']
        if correction is None:
            correction = 'none'

        assert correction.lower() in ['fdr', 'bonferroni', 'none']

        # This is a bug in panther DB where they used bonferonni . should be
        # bonferroni...
        if correction.lower() == "bonferroni":
            correction = "bonferonni"
        assert annotation in [x['id'] for x in self.get_annotation_datasets()]

        params = {'enrichmentTestType': enrichment_test.upper()}
        params['organism'] = organism
        if gene_list:
            params['geneInputList'] = gene_list
        if ref_gene_list:
            params['refInputList'] = ref_gene_list
        params['annotDataSet'] = annotation
        params['correction'] = correction.upper()
        try:
            res = self.services.http_post("enrich/overrep",
                                          params=params,
                                          frmt="json")
            try:
                return res['results']
            except:
                return res
        except:
            return res

    def get_annotation_datasets(self):
        """Retrieve the list of supported annotation data sets"""
        res = self.services.http_get("supportedannotdatasets")
        res = res["search"]["annotation_data_sets"]["annotation_data_type"]
        return res

    def get_ortholog(self,
                     gene_list,
                     organism,
                     target_organism=None,
                     ortholog_type="all"):
        """search for matching orthologs in target organisms.

        Searches for matching orthologs in the gene family that contains
        the search gene associated with the search terms. Returns
        ortholog genes in target organisms given a search organism,
        the search terms and a list of target organisms.

        :param gene_list:
        :param organism: a valid taxon ID
        :param target_organism: zero or more taxon IDs separated by ','. See
            :meth:`~bioservices.panther.Panther.get_supported_genomes`
        :param ortholog_type: optional parameter to specify ortholog type of target organism
        :return: a dictionary with "mapped" and "unmapped" keys, each of them
            being a list. For each unmapped gene, a dictionary with id and
            organism is is returned. For the mapped gene, a list of ortholog is
            returned.

        """
        assert ortholog_type in ['LDO', 'all']
        params = {
            "geneInputList": gene_list,
            "organism": organism,
            "targetOrganism": target_organism,
            "orthologType": ortholog_type
        }
        if params['targetOrganism'] is None:
            del params['targetOrganism']
        res = self.services.http_get("ortholog/matchortho",
                                     frmt='json',
                                     params=params)
        res = res['search']['mapping']
        mapped = res['mapped']

        try:
            unmapped = res['unmapped_ids']['unmapped']
            # make sure we always have a list
            if isinstance(unmapped, dict):
                unmapped = [unmapped]
        except:
            unmapped = []
        res = {"unmapped": unmapped, "mapped": mapped}

        return res

    def get_homolog_position(self,
                             gene,
                             organism,
                             position,
                             ortholog_type="all"):
        """

        :param gene: Can be any of the following: Ensemble gene identifier,
            Ensemble protein identifier, Ensemble transcript identifier, Entrez gene id,
            gene symbol, NCBI GI, HGNC Id, International protein index id, NCBI UniGene id,
            UniProt accession andUniProt id
        :param organism: a valid taxon ID
        :param ortholog_type: optional parameter to specify ortholog type of target organism
        """
        if "," in gene:
            logger.warning(
                "did not expect a comma. Please provide only one gene name")
        assert ortholog_type in ['LDO', 'all']
        assert position >= 1
        params = {
            "gene": gene,
            "organism": organism,
            "pos": position,
            "orthologType": ortholog_type
        }
        res = self.services.http_get("ortholog/homologpos",
                                     params=params,
                                     frmt="json")
        res = res['search']['mapping']
        if "mapped" in res.keys():
            res = res['mapped']
            return res
        elif "unmapped_ids" in res.keys():
            logger.warning("did not find any match for {}".format(gene))
            return res["unmapped_ids"]

    def get_supported_families(self, N=1000, progress=True):
        """Returns the list of supported PANTHER family IDs

        This services returns only 1000 items per request. This is defined by
        the index. For instance index set to 1 returns the first 1000 families.
        Index set to 2 returns families between index 1000 and 2000 and so on.
        As of 20 Feb 2020, there was about 15,000 families.

        This function simplifies your life by calling the service as many times
        as required. Therefore it returns all families in one go.

        """
        from easydev import Progress
        params = {'startIndex': 1}
        res = self.services.http_get("supportedpantherfamilies", params=params)
        results = res['search']['panther_family_subfam_list']['family']
        if len(results) != N:
            msg = "looks like the services changed. Call this function with N={}"
            msg = msg.format(len(results))
            raise ValueError(msg)

        number_of_families = res['search']['number_of_families']
        pb = Progress(int(number_of_families / N))
        pb.animate(1)
        for i in range(1, int(number_of_families / N) + 1):
            params = {'startIndex': i * N + 1}
            res = self.services.http_get("supportedpantherfamilies",
                                         params=params)
            data = res['search']['panther_family_subfam_list']['family']
            results.extend(data)
            if progress:
                pb.animate(i)
        return results

    def get_family_ortholog(self, family, taxon_list=None):
        """Search for matching orthologs in target organisms

        Also return the corresponding position in the target
        organism sequence. The system searches for matching
        orthologs in the gene family that contains the search
        gene associated with the search term.

        :param family: Family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.
        """

        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("familyortholog",
                                     params=params,
                                     frmt="json")
        return res['search']['ortholog_list']['ortholog']

    def get_family_msa(self, family, taxon_list=None):
        """Returns MSA information for the specified family.

        :param family: family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.

        """
        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("familymsa", params=params, frmt="json")
        return res['search']['MSA_list']['sequence_info']

    def get_tree_info(self, family, taxon_list=None):
        """Returns tree topology information and node attributes for the specified family.

        :param family: Family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.
        """
        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("treeinfo", params=params, frmt="json")
        return res['search']  #['tree_topology']['annotation_node']
コード例 #3
0
ファイル: mygeneinfo.py プロジェクト: shunsunsun/bioservices
class MyGeneInfo():
    """Interface to `mygene.infoe <http://mygene.info>`_ service

    .. doctest::

        >>> from bioservices import MyGeneInfo
        >>> s = MyGeneInfoe()

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://mygene.info/v3"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def get_genes(self,
                  ids,
                  fields="symbol,name,taxid,entrezgene,ensemblgene",
                  species=None,
                  dotfield=True,
                  email=None):
        """Get matching gene objects for a list of gene ids


        :param ids: list of geneinfo IDs
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species:  can be used to limit the gene hits from given
            species. You can use "common names" for nine common species (human, mouse, rat,
            fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species,
            you can provide their taxonomy ids. Multiple species can be passed using comma
            as a separator. Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes(("301345,22637"))
            # first one is rat, second is mouse. This will return a 'notfound'
            # entry and the second entry as expected.
            mgi.get_genes("301345,22637", species="mouse") 

        """
        params = {"ids": ids, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if species:
            params["species"] = species

        res = self.services.http_post(
            "gene",  #params=params, 
            data=params,
            frmt="json",
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "application/json",
                "Content-Type": "application/x-www-form-urlencoded"
            })
        return res

    def get_one_gene(self,
                     geneid,
                     fields="symbol,name,taxid,entrezgene,ensemblgene",
                     dotfield=True,
                     email=None):
        """Get matching gene objects for one gene id

        :param geneid: a valid gene ID
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes("301345")
        """
        params = {"ids": geneid, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_get(f"gene/{geneid}",
                                     params=params,
                                     frmt="json")
        return res

    def get_one_query(self,
                      query,
                      email=None,
                      dotfield=True,
                      fields="symbol,name,taxid,entrezgene,ensemblgene",
                      species="human,mouse,rat",
                      size=10,
                      _from=0,
                      sort=None,
                      facets=None,
                      entrezonly=False,
                      ensemblonly=False):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param int size: the maximum number of matching gene hits to return
            (with a cap of 1000 at the moment). Default: 10.
        :param int _from: the number of matching gene hits to skip, starting
            from 0. Combining with "size" parameter, this can be useful for paging. Default:
            0.      
        :param sort: the comma-separated fields to sort on. Prefix with "-" for
            descending order, otherwise in ascending order. Default: sort by matching scores
            in decending order.
        :param str facets: a single field or comma-separated fields to return
            facets, for example, "facets=taxid", "facets=taxid,type_of_gene".
        :param bool entrezonly: when passed as True, the query returns only the hits 
            with valid Entrez gene ids. Default: False.
        :param bool ensembleonly: when passed as True, the query returns only the hits 
            with valid Ensembl gene ids. Default: False.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.




        """
        params = {"fields": fields, "size": size, "from": _from}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if sort:
            params["sort"] = sort
        if facets:  # pragma: no cover
            params["facets"] = sort
        assert entrezonly in [True, False]
        params["entrezonly"] = entrezonly
        assert ensemblonly in [True, False]
        params["ensemblonly"] = entrezonly

        res = self.services.http_get(f"query?q={query}",
                                     params=params,
                                     frmt="json")
        return res

    def get_queries(
        self,
        query,
        email=None,
        dotfield=True,
        scopes="all",
        species="human,mouse,rat",
        fields="symbol,name,taxid,entrezgene,ensemblgene",
    ):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
             "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
             the returned data object contains a single "refseq.rna" field, otherwise
             (False), a single "refseq" field with a sub-field of "rna". Default:
             True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.
        :param str scopes: not documented. Set to 'all'

        """
        params = {"q": query, "fields": fields, "scopes": scopes}
        if email:  # pragma: no cover
            params["email"] = email
        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_post("query",
                                      params=params,
                                      frmt="json",
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "application/json",
                                          "Content-Type":
                                          "application/x-www-form-urlencoded"
                                      })
        return res

    def get_metadata(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res

    def get_taxonomy(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res['taxonomy']
コード例 #4
0
class Seqret():
    """Interface to the `Seqret <http://www.ebi.ac.uk/readseq>`_ service

    ::

        >>> from bioservices import *
        >>> s = Seqret()

    The ReadSeq service was replaced by #the Seqret services (2015).

    .. versionchanged:: 0.15

    """
    def __init__(self, verbose=True):
        """.. rubric:: Constructor

        :param bool verbose:

        """
        url = "https://www.ebi.ac.uk/Tools/services/rest/emboss_seqret"
        self.services = REST(name="seqret", url=url, verbose=verbose)
        self._parameters = None

    def get_parameters(self):
        """Get a list of the parameter names.

        :returns: a list of strings giving the names of the parameters.

        """
        parameters = self.services.http_get("parameters", frmt="json")

        return parameters['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters, doc="Get list of parameter names")

    def get_parameter_details(self, parameterId):
        """Get details of a specific parameter.

        :param str parameter: identifier/name of the parameter to fetch details of.
        :return: a data structure describing the parameter and its values.

        ::

            rs = ReadSeq()
            print(rs.get_parameter_details("stype"))

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        request = "parameterdetails/" + parameterId
        res = self.services.http_get(request, frmt="json")
        return res

    def run(self, email, title, **kargs):
        """Submit a job to the service.

        :param str email: user e-mail address.
        :param str title: job title.
        :param params: parameters for the tool as returned by :meth:`get_parameter_details`.
        :return: string containing the job identifier (jobId).

        Deprecated (olf readseq service)::

            Format Name     Value
            Auto-detected   0
            EMBL            4
            GenBank         2
            Fasta(Pearson)  8
            Clustal/ALN     22
            ACEDB           25
            BLAST           20
            DNAStrider      6
            FlatFeat/FFF    23
            GCG             5
            GFF             24
            IG/Stanford     1
            MSF             15
            NBRF            3
            PAUP/NEXUS      17
            Phylip(Phylip4)     12
            Phylip3.2       11
            PIR/CODATA      14
            Plain/Raw       13
            SCF             21
            XML             19

        As output, you also have

        Pretty 18

        ::

            s = readseq.Seqret()
            jobid = s.run("*****@*****.**", "test", sequence=fasta, inputformat=8,
                outputformat=2)
            genbank = s.get_result(s._jobid)


        """
        for k in kargs.keys():
            self.services.devtools.check_param_in_list(k, self.parameters)

        assert "sequence" in kargs.keys()
        params = {"email": email, "title": title}

        for k in [
                'stype', 'inputformat', 'outputformat', "feature", "firstonly",
                "reverse", 'outputcase', 'seqrange'
        ]:
            if k in kargs.keys():
                value = kargs.get(k)
                details = self.get_parameter_details(k)
                valid_values = [
                    x['value'] for x in details['values']['values']
                ]
                self.services.devtools.check_param_in_list(
                    str(value), valid_values)
                params[k] = value
        #r = requests.post(url + "/run?", data={"sequence":fasta, "stype": "protein",
        #"inputformat":"raw", "outputformat":"fasta", "email":"*****@*****.**",
        #"title":"test"})

        params['sequence'] = kargs['sequence']

        jobid = self.services.http_post("run", frmt="txt", data=params)
        self._jobid = jobid
        return jobid

    def get_status(self, jobid=None):
        """Get the status of a submitted job.

        :param str jobid: job identifier.
        :return: string containing the status.

        The values for the status are:

        - RUNNING: the job is currently being processed.
        - FINISHED: job has finished, and the results can then be retrieved.
        - ERROR: an error occurred attempting to get the job status.
        - FAILURE: the job failed.
        - NOT_FOUND: the job cannot be found.

        """
        res = self.services.http_get("status/{}".format(jobid), frmt="txt")
        return res

    def get_result_types(self, jobid):
        """Get the available result types for a finished job.

        :param str jobid: job identifier.
        :return: a list of wsResultType data structures describing the available result types.
        """
        res = self.services.http_get("resulttypes/{}".format(jobid),
                                     frmt="json")
        return [x['identifier'] for x in res["types"]]

    def get_result(self, jobid, result_type="out"):
        """Get the result of a job of the specified type.

        :param str jobid: job identifier.
        :param parameters: optional list of wsRawOutputParameter used to
            provide additional parameters for derived result types.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "Your job is not finished yet. Try again later.")
            return

        #result_types = self.get_result_types(jobid)
        #assert parameters in result_types
        res = self.services.http_get("result/{}/{}".format(jobid, result_type),
                                     frmt="txt")

        return res
コード例 #5
0
ファイル: pdb.py プロジェクト: philloidin/bioservices
class PDB():
    """Interface to part of the `PDB <http://www.rcsb.org/pdb>`_ service

    :Status: in progress not for production. You can get all ID and retrieve
        uncompressed file in PDB/FASTA formats for now. New features will be
        added on request.

    .. doctest::

        >>> from bioservices import PDB
        >>> s = PDB()
        >>> res = s.get_file("1FBV", "pdb")

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "http://www.rcsb.org/pdb/rest"
        self.services = REST(name="PDB", url=url, verbose=verbose, cache=cache)

    def search(self, query):
        """
        <?xml version="1.0" encoding="UTF-8"?>
        <orgPdbQuery>
        <version>B0907</version>
        <queryType>org.pdb.query.simple.ExpTypeQuery</queryType>
        <description>Experimental Method Search : Experimental Method=SOLID-STATE NMR</description>
        <mvStructure.expMethod.value>SOLID-STATE NMR</mvStructure.expMethod.value>
        </orgPdbQuery>
        """
        res = self.http_post("search", frmt="xml", data=query)
        return res

    def get_current_ids(self):
        """Get a list of all current PDB IDs."""
        res = self.services.http_get("getCurrent", frmt="xml")
        res = self.services.easyXML(res)
        res = [x.attrib['structureId'] for x in res.getchildren()]
        return res

    def get_file(self, identifier, frmt, compression=False, headerOnly=False):
        """Download a file in a specified format

        :param int identifier: a valid Identifier. See :meth:`get_current_ids`.
        :param str fileFormat: a valid format in "pdb", "cif", "xml"

        .. doctest::

            >>> from bioservices import PDB
            >>> s = PDB()
            >>> res = s.get_file("1FBV", "pdb")
            >>> import tempfile
            >>> fh = tempfile.NamedTemporaryFile()
            >>> fh.write(res)
            >>> # manipulate the PDB file with your favorite tool
            >>> # close the file ONLY when finished (this is temporary file)
            >>> # fh.close()

        reference: http://www.rcsb.org/pdb/static.do?p=download/http/index.html
        """
        valid_formats = ["pdb", "cif", "xml"]
        self.services.devtools.check_param_in_list(frmt, valid_formats)
        self.services.devtools.check_param_in_list(headerOnly, [True, False])
        if headerOnly is True:
            headerOnly = "YES"
        else:
            headerOnly = "NO"

        query = "files/" + identifier + "." + frmt
        if compression is True:
            query += ".gz"

        params = {'headerOnly': headerOnly}

        if frmt == "xml":
            res = self.services.http_get(query, frmt=frmt, params=params)
            if compression is False:
                res = self.easyXML(res)
        else:
            res = self.services.http_get(query, frmt="txt", params=params)
        return res

    def get_ligands(self, identifier):
        """List the ligands that can be found in a PDB entry

        :param identifier: a valid PDB identifier (e.g., 4HHB)
        :return: xml document


            >>> from bioservices import PDB
            >>> s = PDB()
            >>> s.get_ligands("4HHB")

        Then, ::

            x = s.get_ligands("4HHB")
            from pyquery import PyQuery as pq
            d = pq(x)


        """

        res = self.services.http_get("rest/ligandInfo",
                                     frmt='xml',
                                     params={'structureId': identifier})
        return res

    def get_xml_query(self, query):
        """Send an XML query

        query = '<?xml version="1.0" encoding="UTF-8"?>
        <orgPdbQuery>
        <version>B0907</version>
        <queryType>org.pdb.query.simple.ExpTypeQuery</queryType>
        <description>Experimental Method Search : Experimental Method=SOLID-STATE NMR</description>
        <mvStructure.expMethod.value>SOLID-STATE NMR</mvStructure.expMethod.value>
        </orgPdbQuery>
        '
        """
        res = self.services.http_post(
            "query/post",
            data=query,
            headers=self.services.get_headers(content='default'))
        return res

    def get_go_terms(self, query):
        res = self.services.http_get("goTerms",
                                     params={"structureId": query},
                                     frmt="xml")
        res = self.services.easyXML(res)
        try:
            return res.content
        except:
            return res

    def get_ligand_info(self, query):
        res = self.services.http_get("ligandInfo",
                                     params={"structureId": query},
                                     frmt="xml")
        res = self.services.easyXML(res)
        try:
            return res.content
        except:
            return res
コード例 #6
0
class NCBIblast():
    """Interface to the `NCBIblast <http://blast.ncbi.nlm.nih.gov/>`_ service.

    ::

        >>> from bioservices import *
        >>> s = NCBIblast(verbose=False)
        >>> jobid = s.run(program="blastp", sequence=s._sequence_example,
            stype="protein", database="uniprotkb", email="name@provider")
        >>> s.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.

    When running a blast request, a program is required. You can obtain the
    list using::

        >>> s.parametersDetails("program")
        [u'blastp', u'blastx', u'blastn', u'tblastx', u'tblastn']

    * blastn: Search a nucleotide database using a nucleotide query
    * blastp: Search protein database using a protein query
    * blastx: Search protein database using a translated nucleotide query
    * tblastn     Search translated nucleotide database using a protein query
    * tblastx     Search translated nucleotide database using a translated nucleotide query

    """

    _sequence_example = "MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS"

    def __init__(self, verbose=False):
        """.. rubric:: NCBIblast constructor

        :param bool verbose: prints informative messages

        """
        url = "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast"
        self.services = REST(name="NCBIblast", url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self.checkInterval = 2

    def get_parameters(self):
        """List parameter names.

        :returns: An XML document containing a list of parameter names.

        ::

            >>> from bioservices import ncbiblast 
            >>> n = ncbiblast.NCBIblast()
            >>> res = n.get_parameters()
            >>> [x.text for x in res.findAll("id")]

        .. seealso:: :attr:`parameters` to get a list of the parameters without
            need to process the XML output.
        """

        res = self.services.http_get("parameters",
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "Accept": "application/json"
                                     })
        return res['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

        :returns: An XML document providing details about the parameter or a list
            of values that can take the parameters if the XML could be parsed.

        For example::

            >>> s.parameter_details("matrix")
            [u'BLOSUM45',
             u'BLOSUM50',
             u'BLOSUM62',
             u'BLOSUM80',
             u'BLOSUM90',
             u'PAM30',
             u'PAM70',
             u'PAM250']

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request,
                                         frmt="json",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "Accept":
                                             "application/json"
                                         })

            try:
                data = [x['value'] for x in res["values"]["values"]]
            except:
                data = res
            self._parametersDetails[parameterId] = data
        return self._parametersDetails[parameterId]

    def run(self,
            program=None,
            database=None,
            sequence=None,
            stype="protein",
            email=None,
            **kargs):
        """ Submit a job with the specified parameters.

        .. python ncbiblast_urllib2.py -D ENSEMBL --email "*****@*****.**" --sequence
        .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS
        .. --program blastp --database uniprotkb


        .. rubric:: Compulsary arguments

        :param str program: BLAST program to use to perform the search (e.g., blastp)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param list database: list of database names for search or possible a single string (for one database).
            There are some mismatch between the output of parametersDetails("database") and
            the accepted values. For instance UniProt Knowledgebase should be
            given as "uniprotkb".
        :param str email: a valid email address. Will be checked by the service itself.

        .. rubric:: Optional arguments. If not provided, a default value will be used

        :param str type: query sequence type in 'dna', 'rna' or 'protein' (default is protein).
        :param str matrix: scoring matrix to be used in the search (e.g., BLOSUM45).
        :param bool gapalign:  perform gapped alignments.
        :param int alignments:     maximum number of alignments displayed in the output.
        :param exp:     E-value threshold.
        :param bool filter:  low complexity sequence filter to process the query
            sequence before performing the search.
        :param int scores:     maximum number of scores displayed in the output.
        :param int dropoff:     amount score must drop before extension of hits is halted.
        :param match_scores:     match/miss-match scores to generate a scoring matrix 
            for nucleotide searches.
        :param int gapopen:     penalty for the initiation of a gap.
        :param int gapext:     penalty for each base/residue in a gap.
        :param seqrange: region of the query sequence to use for the search. 
            Default: whole sequence.
        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up to data values accepted for each of these parameters can be
        retrieved from the :meth:`get_parameter_details`.

        For instance,::

            from bioservices import NCBIblast
            n = NCBIblast()
            n.get_parameter_details("program")

        Example::

            jobid = n.run(program="blastp",
                 sequence=n._sequence_example,
                 stype="protein",
                 database="uniprotkb",
                 email="*****@*****.**")

        Database can be a list of databases::

            database=["uniprotkb", "uniprotkb_swissprot"]

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/geeting the results.

        .. seealso:: :meth:`getResult`

        .. warning:: Cases are not important. Spaces in the database case should 
            be replaced by underscore.

        .. note:: database returned by the server have meaningless names since
            they do not map to the expected names. An example is "ENA Sequence Release" 
            that should be provided as em_rel

        http://www.ebi.ac.uk/Tools/sss/ncbiblast/help/index-nucleotide.html

        """
        # There are compulsary arguments:
        if program is None or sequence is None or database is None or email is None:
            raise ValueError(
                "program, sequence, email  and database must be provided")

        checkParam = self.services.devtools.check_param_in_list

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better
        checkParam(program, self.get_parameter_details("program"))
        checkParam(stype, ["protein", "dna", "rna"])

        # So far, we have these parameters
        params = {
            'program': program,
            'sequence': sequence,
            'email': email,
            'stype': stype
        }

        # all others are optional (actually type is also optional)
        # We can check all of the optional argument provided automatically.
        # this is fine for now but note for instance that stype could not be put
        # here because what is returned by parametersDetails is not exactly what
        # is expected.
        for k, v in kargs.items():
            #print(k, v)
            checkParam(v, self.get_parameter_details(k))
            params[k] = v

        # similarly for the database, we must process it by hand because ther
        # can be more than one database
        #checkParam(database.lower(), [str(x.replace(" ", "_").lower())
        #    for x in self.parametersDetails("database")])
        if isinstance(database, list):
            databases = database[:]
        elif isinstance(database, str):
            databases = [database]
        else:
            raise TypeError("database must be a string or a list of strings")
        params['database'] = databases
        """
parser.add_option('--seqrange', help='region within input to use as query')
# General options
parser.add_option('--title', help='job title')
parser.add_option('--outfile', help='file name for results')
parser.add_option('--outformat', help='output format for results')
parser.add_option('--async', action='store_true', help='asynchronous mode')
parser.add_option('--jobid', help='job identifier')
parser.add_option('--polljob', action="store_true", help='get job result')
parser.add_option('--status', action="store_true", help='get job status')
parser.add_option('--resultTypes', action='store_true', help='get result types')
    """
        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post("run",
                                      frmt=None,
                                      data=params,
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "text/plain"
                                      })

        return res

    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid:
        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get("status/{}".format(jobid),
                                     frmt="txt",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "text/plain"
                                     })
        return res

    def get_result_types(self, jobid):
        """ Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :param bool verbose: print the identifiers together with their label,
            mediaTypes, description and filesuffix.

        :return: A dictionary, which keys correspond to the identifiers. Each
            identifier is itself a dictionary containing the label, description,
            file suffix and mediaType of the identifier.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = 'resulttypes/' + jobid
        res = self.services.http_get(url,
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })
        return [x["identifier"] for x in res['types']]

    def get_result(self, jobid, result_type):
        """ Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str  result_type: type of result to retrieve. See :meth:`getResultTypes`.

        The output from the tool itself.
        Use the 'format' parameter to retireve the output in different formats,
        the 'compressed' parameter to retrieve the xml output in compressed form.
        Format options::

           0 = pairwise,
           1 = query-anchored showing identities,
           2 = query-anchored no identities,
           3 = flat query-anchored showing identities,
           4 = flat query-anchored no identities,
           5 = XML Blast output,
           6 = tabular,
           7 = tabular with comment lines,
           8 = Text ASN.1,
           9 = Binary ASN.1,
           10 = Comma-separated values,
           11 = BLAST archive format (ASN.1).

      See NCBI Blast documentation for details.
      Use the 'compressed' parameter to return the XML output in compressed form.
      e.g. '?format=5&compressed=true'.


        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid)
        if self.get_status(jobid) != "FINISHED":
            raise ValueError("job is not finished")
        url = 'result/' + jobid + '/' + result_type

        if result_type in ['out', "error", "sequence", "ids"]:
            res = self.services.http_get(url,
                                         frmt="txt",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "accept":
                                             "text/plain"
                                         })
        elif result_type in ['xml']:
            res = self.services.http_get(url,
                                         frmt="xml",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "accept":
                                             "text/plain"
                                         })
        return res

    def wait(self, jobId):
        """This function checks the status of a jobid while it is running

        :param str jobid: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between requests in seconds.

        """

        if self.checkInterval < 1:
            raise ValueError(
                "checkInterval must be positive and less than a second")
        result = 'PENDING'
        while result == 'RUNNING' or result == 'PENDING':
            result = self.get_status(jobId)
            if result == 'RUNNING' or result == 'PENDING':
                time.sleep(self.checkInterval)
        return result

    def _get_database(self):
        return self.get_parameter_details("database")

    databases = property(_get_database, doc=r"""Returns accepted databases.""")
コード例 #7
0
ファイル: muscle.py プロジェクト: shunsunsun/bioservices
class MUSCLE():
    """Interface to the `MUSCLE <http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest>`_ service.

    ::

        >>> from bioservices import *
        >>> m = MUSCLE(verbose=False)
        >>> sequencesFasta = open('filename','r')
        >>> jobid = n.run(frmt="fasta", sequence=sequencesFasta.read(),
                        email="name@provider")
        >>> s.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.


    Here is another similar example but we use :class:`~bioservices.uniprot.UniProt`
    class provided in bioservices to fetch the FASTA sequences::


        >>> from bioservices import UniProt, MUSCLE
        >>> u = UniProt(verbose=False)
        >>> f1 = u.get_fasta("P18413")
        >>> f2 = u.get_fasta("P18412")
        >>> m = MUSCLE(verbose=False)
        >>> jobid = m.run(frmt="fasta", sequence=f1+f2, email="name@provider")
        >>> m.getResult(jobid, "out")

    """
    def __init__(self, verbose=False):
        url = "http://www.ebi.ac.uk/Tools/services/rest/muscle"
        self.services = REST(name='MUSCLE', url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self._headers = {
            "User-Agent": self.services.getUserAgent(),
            "accept": "application/json"
        }

    def get_parameters(self):
        """List parameter names.

         :returns: An XML document containing a list of parameter names.

         ::

             >>> from bioservices import muscle
             >>> n = muscle.Muscle()
             >>> res = n.get_parameters()
             >>> [x.text for x in res.findAll("id")]

         .. seealso:: :attr:`parameters` to get a list of the parameters without
            need to process the XML output.
        """

        res = self.services.http_get("parameters",
                                     frmt="json",
                                     headers=self._headers)
        return res['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

          :returns: An XML document providing details about the parameter or a list
              of values that can take the parameters if the XML could be parsed.

          For example::

              >>> n.get_parameter_details("format")

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request,
                                         frmt="json",
                                         headers=self._headers)
            self._parametersDetails[parameterId] = res
        return res

    def run(self, frmt=None, sequence=None, tree="none", email=None):
        """ Submit a job with the specified parameters.

        .. python ncbiblast_urllib2.py -D ENSEMBL --email "*****@*****.**" --sequence
        .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS
        .. --program blastp --database uniprotkb


        .. rubric:: Compulsary arguments

        :param str frmt: input format (e.g., fasta)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param str tree: tree type ('none','tree1','tree2')
        :param str email: a valid email address. Will be checked by the service itself.

        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up to data values accepted for each of these parameters can be
        retrieved from the :meth:`get_parameter_details`.

        For instance,::

            from bioservices import MUSCLE
            m = MUSCLE()
            m.parameterDetails("tree")

        Example::

            jobid = m.run(frmt="fasta",
                 sequence=sequence_example,
                 email="*****@*****.**")

        frmt can be a list of formats::

            frmt=['fasta','clw','clwstrict','html','msf','phyi','phys']

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/geeting the results.

        .. seealso:: :meth:`getResult`

        """
        # There are compulsary arguments:
        if frmt is None or sequence is None or email is None:
            raise ValueError("frmt, sequence and email must be provided")

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better

        # FIXME: return parameters from server are not valid
        self.services.devtools.check_param_in_list(
            frmt, ['fasta', 'clw', 'clwstrict', 'html', 'msf', 'phyi', 'phys'])
        self.services.devtools.check_param_in_list(tree,
                                                   ['none', 'tree1', 'tree2'])

        # parameter structure
        params = {'format': frmt, 'sequence': sequence, 'email': email}

        # headers is muscle is not required. If provided
        # by the default values from bioservices, it does not
        # work.
        headers = {}

        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post("run",
                                      data=params,
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "text/plain"
                                      })
        return res

    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid:
        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get("status/{}".format(jobid),
                                     frmt="txt",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "text/plain"
                                     })
        return res

    def get_result_types(self, jobid):
        """ Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :param bool verbose: print the identifiers together with their label,
            mediaTypes, description and filesuffix.

        :return: A dictionary, which keys correspond to the identifiers. Each
            identifier is itself a dictionary containing the label, description,
            file suffix and mediaType of the identifier.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = 'resulttypes/' + jobid
        res = self.services.http_get(url,
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })
        return [x["identifier"] for x in res['types']]

    def get_result(self, jobid, result_type):
        """ Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str  resultType: type of result to retrieve. See :meth:`getResultTypes`.
 
        """
        if self.get_status(jobid) != 'FINISHED':  #pragma: no cover
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)

        if self.get_status(jobid) != "FINISHED":  #pragma: no cover
            raise ValueError("job is not finished")

        assert result_type in self.get_result_types(jobid)
        url = '/result/' + jobid + '/' + result_type

        if result_type in ['out', 'sequence', "aln-fasta", "pim", "phylotree"]:
            frmt = "txt"
        res = self.services.http_get(url,
                                     frmt=frmt,
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })

        return res

    def wait(self, jobId, checkInterval=5, verbose=True):
        """This function checks the status of a jobid while it is running

        :param str jobid: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between requests in seconds.

        """
        if checkInterval < 1:  #prgma: no cover
            raise ValueError(
                "checkInterval must be positive and less than minute")
        result = 'PENDING'
        while result == 'RUNNING' or result == 'PENDING':
            result = self.get_status(jobId)
            if verbose:
                # required from __future__ import print_function
                print("WARNING: ", jobId, " is ", result, file=sys.stderr)

            if result == 'RUNNING' or result == 'PENDING':
                time.sleep(checkInterval)
        return result
コード例 #8
0
class PDB():
    """Interface to `PDB <http://search.rcsb.org/>`_ service (new API Jan 2021)

    With the new API, one method called :meth:`~bioservices.pdb.PDB.search` is
    provided by PDB. To perform a search you need to define a query. Here is an
    example

    .. doctest::

        >>> from bioservices import PDB
        >>> s = PDB()
        >>> query = {"query": 
        ...              {"type": "terminal", 
        ...               "service": "text", 
        ...               "parameters": { 
        ...                 "value": "thymidine kinase"
        ...                 }
        ...             },
        ...          "return_type": "entry"}
        >>> res = s.search(query, return_type=return_type)


    .. note:: as of December 2020, a new API has be set up by PDB. 
        some prevous functionalities such as return list of Ligand are not
        supported anymore (Jan 2021). However, many more powerful searches as
        available. I encourage everyone to look at the PDB page for complex
        examples: http://search.rcsb.org/#examples

    As mentionnaed above, the PDB service provide one method called search available in
    :meth:`~bioservices.pdb.PDB.search`. We will not cover all the power and
    capability of this search function. User should refer to the official PDB help
    for that. Yet, given examples from PDB should all work with this method. 

    When possible, we will add convenient aliases function in this class. For
    now we have for example the :meth:`~bioservices.pdb.PDB.get_current_ids` and
    :meth:`~bioservices.pdb.PDB.get_similarity_sequence` that users may find useful. 

    The main idea behind the PDB API is to create queries that can access to
    different type of services. A query will need to at least two keys:

    - **query**
    - **return_type**

    Consider this basic example that searches for the text *thymidine kinase*::

        {
          "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
              "value": "thymidine kinase"
            }
          },
          "return_type": "entry"
        }

    Here the query is defined by a **query** and a **return_type** indeed. The
    return type is a simple value such as **entry**. The query itself is
    composed of 3 pairs of key/value. Here we have the type service and
    parameters as defined below.

    The query can have several fields:

    - **type**: the clause type can be either **terminal** or **group**

        - **terminal**: performs an atomic search operation, e.g. searches 
          for a particular value in a particular field. 
        - **group**: wraps other terminal or group nodes and is 
          used to combine multiple queries in a logical fashion.

    - **service**:

        - **text**: linguistic searches against textual annotations.
        - **sequence**: uses MMSeq2 to perform sequence matching searches (blast-like).
          following targets that are available: 

          - pdb_protein_sequence, 
          - pdb_dna_sequence, 
          - pdb_na_sequence
        - **seqmotif**: performs short motif searches against nucleotide or protein
          sequences using 3 different inputs:

          - simple (e.g., CXCXXL)
          - prosite (e.g., C-X-C-X(2)-[LIVMYFWC])
          - regex (e.g., CXCX{2}[LIVMYFWC])
        - **structure**: searches matching a global 3D shape of assemblies
          or chains of a given entry (identified by PDB ID), in either strict
          (strict_shape_match) or relaxed (relaxed_shape_match) modes
        - strucmotif: Performs structural motif searches on all available PDB structures.
        - chemical: queries of small-molecule constituents of PDB structures, 
          based on chemical formula and chemical structure. Queries for matching and similar
          chemical structures can be performed using SMILES and InChI descriptors
          as search targets.

          - graph-strict: atom type, formal charge, bond order, atom and bond chirality, 
            aromatic assignment are used as matching criteria for this search type. 
          - graph-relaxed: atom type, formal charge and bond order are used as
            matching criteria for this search type. 
          - graph-relaxed-stereo: atom type, formal charge, bond order, atom
            and bond chirality are used as matching criteria for this search
            type.
          - fingerprint-similarity: Tanimoto similarity is used as the matching criteria 

    Concerning the **return_type** key, it can be one of :

    - entry: a list of PDB IDs.
    - assembly: list of PDB IDs appended with assembly IDs in the format of 
      a [pdb_id]-[assembly_id], corresponding to biological assemblies.
    - polymer_entity: list of PDB IDs appended with entity IDs in the format 
      of a [pdb_id]_[entity_id], corresponding to polymeric molecular entities.
    - non_polymer_entity: list of PDB IDs appended with entity IDs in the 
      format of a [pdb_id]_[entity_id], corresponding to non-polymeric entities (or ligands).
    - polymer_instance: list of PDB IDs appended with asym IDs in the format 
      of a [pdb_id].[asym_id], corresponding to instances of certain polymeric 
      molecular entities, also known as chains.

    **Optional arguments**

    There are many optional arguments. Let us see a couple of them. Pagination can be 
    set (default is 10 entries) using the **request_options** (optional) key.
    Consider this query example::

        {
          "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "rcsb_polymer_entity.formula_weight",
                "operator": "greater",
                "value": 500
            }
          },
          "request_options": {
            "pager": {
              "start": 0,
              "rows": 100
            }
          },
          "return_type": "polymer_entity"
        }

    Here, the query searches for the polymer_entity that have a formula weight
    above 500. Withe request_options pager set to 100, we will get the first 100
    hits. 

    To return all hits, set this field in the request_options::

        "return_all_hits": true

    Coming back at the first basic example, we can reuse it to illustrate how to
    refine the search using attribute and operators::

        {
          "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
              "value": "thymidine kinase",
              "attribute": "exptl.method",
              "operator": "exact_match",
            }
          },
          "return_type": "entry"
        }

    All valid combo of operators and attributes can be found
    here: http://search.rcsb.org/search-attributes.html

    For instance, in the example above only in, exact_match and exists can be
    used with exptl.method attribute. This is not checked in bioservices.

    Sorting is determined by the sort object in the request_options context. 
    It allows you to add one or more sorting conditions to control the order of
    the search result hits. The sort operation is defined on a per field level, with
    special field name for score to sort by score (the default)<

    By default sorting is done in descending order ("desc"). The sort can be
    reversed by setting direction property to "asc". This example demonstrates how
    to sort the search results by release date::

        {
          "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
              "attribute": "struct.title",
              "operator": "contains_phrase",
              "value": "\"hiv protease\""
            }
          },
          "request_options": {
            "sort": [
              {
                "sort_by": "rcsb_accession_info.initial_release_date",
                "direction": "desc"
              }
            ]
          },
          "return_type": "entry"
        }
    
    Again, many more complex examples can be found on PDB page. 
    """
    _url = "http://search.rcsb.org/rcsbsearch/v1/"

    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        self.services = REST(name="PDB",
                             verbose=verbose,
                             cache=cache,
                             url_defined_later=True)
        self.services.url = PDB._url

    def search(self,
               query,
               request_options=None,
               request_info=None,
               return_type=None):
        """search request represented as a JSON object.

        This is the only function in PDB API. You should be able
        to perform any valid PDB searches here (see the
        :class:`bioservices.pdb.PDB` documentation for details.
        Note, however, that we have aliases methods in BioServices that will be
        added on demand for common searches.

        :param str query: the search expression. Can be omitted if, instead of IDs retrieval,
            facets or count operation should be performed. In this case the request must be
            configured via the request_options context.
        :param str request_options: (optional) controls various aspects of the search request
            including pagination, sorting, scoring and faceting.
        :param str request_info: additional information about the query, e.g.
            query_id. (optional)
        :param str return_type: type of results to return.
        :return: json results

        You must define a query as defined in the PDB web page. For example the
        following query search for macromolecular PDB entities that share 90% sequence
        identity with GTPase HRas protein from Gallus gallus (Chicken)::

            query = {
              "query": {
                "type": "terminal",
                "service": "sequence",
                "parameters": {
                  "evalue_cutoff": 1,
                  "identity_cutoff": 0.9,
                  "target": "pdb_protein_sequence",
                  "value": "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLPARTVETRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMNCKCVIS"
                }
              },
              "request_options": {
                "scoring_strategy": "sequence"
              },
              "return_type": "polymer_entity"
            }

        What is important is that the dictionary called **query** contains 2
        compulsary keys namely **query** and **return_type**. The two other optional
        keys are **request_options** and **return_info**

        You would then call the PDB search as follows::

            from bioservices import PDB
            p = PDB()
            results = p.search(query)

        Now, in BioServices, you can also decompose the query as follows::

            query = {
                "type": "terminal",
                "service": "sequence",
                "parameters": {
                  "evalue_cutoff": 1,
                  "identity_cutoff": 0.9,
                  "target": "pdb_protein_sequence",
                  "value": "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLPARTVETRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMNCKCVIS"
                }}
            request_options =  { "scoring_strategy": "sequence"}
            return_type= "polymer_entity"

        and then use PDB search again::

            from bioservices import PDB
            p = PDB()
            results = p.search(query, request_options=request_options, return_type=return_type)

        or even simpler for the Pythonic lovers::

            results = p.search(**query)


        """
        if "query" in query:
            pass
        else:
            query = {"query": query}
            if request_options:
                query['request_options'] = request_options
            if request_info:
                query['request_info'] = request_info
            if return_type:
                query['return_type'] = return_type
        if 'return_type' not in query:  #pragma: no cover
            raise ValueError("Yourr query must have a return_type key")
        print(query)
        res = self.services.http_post("query", frmt="json", json=query)
        return res

    def get_current_ids(self):
        """Get a list of all current PDB IDs."""

        # first query returns 10 entries by default

        request_options = {"return_all_hits": True}

        # second requests all entries
        res = self.search(query={
            'type': 'terminal',
            'service': 'text'
        },
                          request_options=request_options,
                          return_type="entry")

        identifiers = [x['identifier'] for x in res['result_set']]
        return identifiers

    def get_similarity_sequence(self, seq):
        """Search of seauence similarity search with protein sequence

            seq = "VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTAVAHVDDMPNAL"
            results = p.get_similarity_sequence(seq)

        """
        res = self.search({
            "query": {
                "type": "terminal",
                "service": "sequence",
                "parameters": {
                    "target": "pdb_protein_sequence",
                    "value": seq
                }
            },
            "return_type": "polymer_entity"
        })
        return res