Example #1
0
class NCBIblast():
    """Interface to the `NCBIblast <http://blast.ncbi.nlm.nih.gov/>`_ service.

    ::

        >>> from bioservices import *
        >>> s = NCBIblast(verbose=False)
        >>> jobid = s.run(program="blastp", sequence=s._sequence_example,
            stype="protein", database="uniprotkb", email="name@provider")
        >>> s.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.

    When running a blast request, a program is required. You can obtain the
    list using::

        >>> s.parametersDetails("program")
        [u'blastp', u'blastx', u'blastn', u'tblastx', u'tblastn']

    * blastn: Search a nucleotide database using a nucleotide query
    * blastp: Search protein database using a protein query
    * blastx: Search protein database using a translated nucleotide query
    * tblastn     Search translated nucleotide database using a protein query
    * tblastx     Search translated nucleotide database using a translated nucleotide query

    """

    _sequence_example = "MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS"

    def __init__(self, verbose=False):
        """.. rubric:: NCBIblast constructor

        :param bool verbose: prints informative messages

        """
        url = "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast"
        self.services = REST(name="NCBIblast", url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self.checkInterval = 2

    def get_parameters(self):
        """List parameter names.

        :returns: An XML document containing a list of parameter names.

        ::

            >>> from bioservices import ncbiblast 
            >>> n = ncbiblast.NCBIblast()
            >>> res = n.get_parameters()
            >>> [x.text for x in res.findAll("id")]

        .. seealso:: :attr:`parameters` to get a list of the parameters without
            need to process the XML output.
        """

        res = self.services.http_get("parameters",
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "Accept": "application/json"
                                     })
        return res['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

        :returns: An XML document providing details about the parameter or a list
            of values that can take the parameters if the XML could be parsed.

        For example::

            >>> s.parameter_details("matrix")
            [u'BLOSUM45',
             u'BLOSUM50',
             u'BLOSUM62',
             u'BLOSUM80',
             u'BLOSUM90',
             u'PAM30',
             u'PAM70',
             u'PAM250']

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request,
                                         frmt="json",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "Accept":
                                             "application/json"
                                         })

            try:
                data = [x['value'] for x in res["values"]["values"]]
            except:
                data = res
            self._parametersDetails[parameterId] = data
        return self._parametersDetails[parameterId]

    def run(self,
            program=None,
            database=None,
            sequence=None,
            stype="protein",
            email=None,
            **kargs):
        """ Submit a job with the specified parameters.

        .. python ncbiblast_urllib2.py -D ENSEMBL --email "*****@*****.**" --sequence
        .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS
        .. --program blastp --database uniprotkb


        .. rubric:: Compulsary arguments

        :param str program: BLAST program to use to perform the search (e.g., blastp)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param list database: list of database names for search or possible a single string (for one database).
            There are some mismatch between the output of parametersDetails("database") and
            the accepted values. For instance UniProt Knowledgebase should be
            given as "uniprotkb".
        :param str email: a valid email address. Will be checked by the service itself.

        .. rubric:: Optional arguments. If not provided, a default value will be used

        :param str type: query sequence type in 'dna', 'rna' or 'protein' (default is protein).
        :param str matrix: scoring matrix to be used in the search (e.g., BLOSUM45).
        :param bool gapalign:  perform gapped alignments.
        :param int alignments:     maximum number of alignments displayed in the output.
        :param exp:     E-value threshold.
        :param bool filter:  low complexity sequence filter to process the query
            sequence before performing the search.
        :param int scores:     maximum number of scores displayed in the output.
        :param int dropoff:     amount score must drop before extension of hits is halted.
        :param match_scores:     match/miss-match scores to generate a scoring matrix 
            for nucleotide searches.
        :param int gapopen:     penalty for the initiation of a gap.
        :param int gapext:     penalty for each base/residue in a gap.
        :param seqrange: region of the query sequence to use for the search. 
            Default: whole sequence.
        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up to data values accepted for each of these parameters can be
        retrieved from the :meth:`get_parameter_details`.

        For instance,::

            from bioservices import NCBIblast
            n = NCBIblast()
            n.get_parameter_details("program")

        Example::

            jobid = n.run(program="blastp",
                 sequence=n._sequence_example,
                 stype="protein",
                 database="uniprotkb",
                 email="*****@*****.**")

        Database can be a list of databases::

            database=["uniprotkb", "uniprotkb_swissprot"]

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/geeting the results.

        .. seealso:: :meth:`getResult`

        .. warning:: Cases are not important. Spaces in the database case should 
            be replaced by underscore.

        .. note:: database returned by the server have meaningless names since
            they do not map to the expected names. An example is "ENA Sequence Release" 
            that should be provided as em_rel

        http://www.ebi.ac.uk/Tools/sss/ncbiblast/help/index-nucleotide.html

        """
        # There are compulsary arguments:
        if program is None or sequence is None or database is None or email is None:
            raise ValueError(
                "program, sequence, email  and database must be provided")

        checkParam = self.services.devtools.check_param_in_list

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better
        checkParam(program, self.get_parameter_details("program"))
        checkParam(stype, ["protein", "dna", "rna"])

        # So far, we have these parameters
        params = {
            'program': program,
            'sequence': sequence,
            'email': email,
            'stype': stype
        }

        # all others are optional (actually type is also optional)
        # We can check all of the optional argument provided automatically.
        # this is fine for now but note for instance that stype could not be put
        # here because what is returned by parametersDetails is not exactly what
        # is expected.
        for k, v in kargs.items():
            #print(k, v)
            checkParam(v, self.get_parameter_details(k))
            params[k] = v

        # similarly for the database, we must process it by hand because ther
        # can be more than one database
        #checkParam(database.lower(), [str(x.replace(" ", "_").lower())
        #    for x in self.parametersDetails("database")])
        if isinstance(database, list):
            databases = database[:]
        elif isinstance(database, str):
            databases = [database]
        else:
            raise TypeError("database must be a string or a list of strings")
        params['database'] = databases
        """
parser.add_option('--seqrange', help='region within input to use as query')
# General options
parser.add_option('--title', help='job title')
parser.add_option('--outfile', help='file name for results')
parser.add_option('--outformat', help='output format for results')
parser.add_option('--async', action='store_true', help='asynchronous mode')
parser.add_option('--jobid', help='job identifier')
parser.add_option('--polljob', action="store_true", help='get job result')
parser.add_option('--status', action="store_true", help='get job status')
parser.add_option('--resultTypes', action='store_true', help='get result types')
    """
        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post("run",
                                      frmt=None,
                                      data=params,
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "text/plain"
                                      })

        return res

    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid:
        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get("status/{}".format(jobid),
                                     frmt="txt",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "text/plain"
                                     })
        return res

    def get_result_types(self, jobid):
        """ Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :param bool verbose: print the identifiers together with their label,
            mediaTypes, description and filesuffix.

        :return: A dictionary, which keys correspond to the identifiers. Each
            identifier is itself a dictionary containing the label, description,
            file suffix and mediaType of the identifier.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = 'resulttypes/' + jobid
        res = self.services.http_get(url,
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })
        return [x["identifier"] for x in res['types']]

    def get_result(self, jobid, result_type):
        """ Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str  result_type: type of result to retrieve. See :meth:`getResultTypes`.

        The output from the tool itself.
        Use the 'format' parameter to retireve the output in different formats,
        the 'compressed' parameter to retrieve the xml output in compressed form.
        Format options::

           0 = pairwise,
           1 = query-anchored showing identities,
           2 = query-anchored no identities,
           3 = flat query-anchored showing identities,
           4 = flat query-anchored no identities,
           5 = XML Blast output,
           6 = tabular,
           7 = tabular with comment lines,
           8 = Text ASN.1,
           9 = Binary ASN.1,
           10 = Comma-separated values,
           11 = BLAST archive format (ASN.1).

      See NCBI Blast documentation for details.
      Use the 'compressed' parameter to return the XML output in compressed form.
      e.g. '?format=5&compressed=true'.


        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid)
        if self.get_status(jobid) != "FINISHED":
            raise ValueError("job is not finished")
        url = 'result/' + jobid + '/' + result_type

        if result_type in ['out', "error", "sequence", "ids"]:
            res = self.services.http_get(url,
                                         frmt="txt",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "accept":
                                             "text/plain"
                                         })
        elif result_type in ['xml']:
            res = self.services.http_get(url,
                                         frmt="xml",
                                         headers={
                                             "User-Agent":
                                             self.services.getUserAgent(),
                                             "accept":
                                             "text/plain"
                                         })
        return res

    def wait(self, jobId):
        """This function checks the status of a jobid while it is running

        :param str jobid: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between requests in seconds.

        """

        if self.checkInterval < 1:
            raise ValueError(
                "checkInterval must be positive and less than a second")
        result = 'PENDING'
        while result == 'RUNNING' or result == 'PENDING':
            result = self.get_status(jobId)
            if result == 'RUNNING' or result == 'PENDING':
                time.sleep(self.checkInterval)
        return result

    def _get_database(self):
        return self.get_parameter_details("database")

    databases = property(_get_database, doc=r"""Returns accepted databases.""")
Example #2
0
class MyGeneInfo():
    """Interface to `mygene.infoe <http://mygene.info>`_ service

    .. doctest::

        >>> from bioservices import MyGeneInfo
        >>> s = MyGeneInfoe()

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://mygene.info/v3"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def get_genes(self,
                  ids,
                  fields="symbol,name,taxid,entrezgene,ensemblgene",
                  species=None,
                  dotfield=True,
                  email=None):
        """Get matching gene objects for a list of gene ids


        :param ids: list of geneinfo IDs
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species:  can be used to limit the gene hits from given
            species. You can use "common names" for nine common species (human, mouse, rat,
            fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species,
            you can provide their taxonomy ids. Multiple species can be passed using comma
            as a separator. Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes(("301345,22637"))
            # first one is rat, second is mouse. This will return a 'notfound'
            # entry and the second entry as expected.
            mgi.get_genes("301345,22637", species="mouse") 

        """
        params = {"ids": ids, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if species:
            params["species"] = species

        res = self.services.http_post(
            "gene",  #params=params, 
            data=params,
            frmt="json",
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "application/json",
                "Content-Type": "application/x-www-form-urlencoded"
            })
        return res

    def get_one_gene(self,
                     geneid,
                     fields="symbol,name,taxid,entrezgene,ensemblgene",
                     dotfield=True,
                     email=None):
        """Get matching gene objects for one gene id

        :param geneid: a valid gene ID
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes("301345")
        """
        params = {"ids": geneid, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_get(f"gene/{geneid}",
                                     params=params,
                                     frmt="json")
        return res

    def get_one_query(self,
                      query,
                      email=None,
                      dotfield=True,
                      fields="symbol,name,taxid,entrezgene,ensemblgene",
                      species="human,mouse,rat",
                      size=10,
                      _from=0,
                      sort=None,
                      facets=None,
                      entrezonly=False,
                      ensemblonly=False):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param int size: the maximum number of matching gene hits to return
            (with a cap of 1000 at the moment). Default: 10.
        :param int _from: the number of matching gene hits to skip, starting
            from 0. Combining with "size" parameter, this can be useful for paging. Default:
            0.      
        :param sort: the comma-separated fields to sort on. Prefix with "-" for
            descending order, otherwise in ascending order. Default: sort by matching scores
            in decending order.
        :param str facets: a single field or comma-separated fields to return
            facets, for example, "facets=taxid", "facets=taxid,type_of_gene".
        :param bool entrezonly: when passed as True, the query returns only the hits 
            with valid Entrez gene ids. Default: False.
        :param bool ensembleonly: when passed as True, the query returns only the hits 
            with valid Ensembl gene ids. Default: False.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.




        """
        params = {"fields": fields, "size": size, "from": _from}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if sort:
            params["sort"] = sort
        if facets:  # pragma: no cover
            params["facets"] = sort
        assert entrezonly in [True, False]
        params["entrezonly"] = entrezonly
        assert ensemblonly in [True, False]
        params["ensemblonly"] = entrezonly

        res = self.services.http_get(f"query?q={query}",
                                     params=params,
                                     frmt="json")
        return res

    def get_queries(
        self,
        query,
        email=None,
        dotfield=True,
        scopes="all",
        species="human,mouse,rat",
        fields="symbol,name,taxid,entrezgene,ensemblgene",
    ):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
             "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
             the returned data object contains a single "refseq.rna" field, otherwise
             (False), a single "refseq" field with a sub-field of "rna". Default:
             True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.
        :param str scopes: not documented. Set to 'all'

        """
        params = {"q": query, "fields": fields, "scopes": scopes}
        if email:  # pragma: no cover
            params["email"] = email
        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_post("query",
                                      params=params,
                                      frmt="json",
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "application/json",
                                          "Content-Type":
                                          "application/x-www-form-urlencoded"
                                      })
        return res

    def get_metadata(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res

    def get_taxonomy(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res['taxonomy']
Example #3
0
class MUSCLE():
    """Interface to the `MUSCLE <http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest>`_ service.

    ::

        >>> from bioservices import *
        >>> m = MUSCLE(verbose=False)
        >>> sequencesFasta = open('filename','r')
        >>> jobid = n.run(frmt="fasta", sequence=sequencesFasta.read(),
                        email="name@provider")
        >>> s.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.


    Here is another similar example but we use :class:`~bioservices.uniprot.UniProt`
    class provided in bioservices to fetch the FASTA sequences::


        >>> from bioservices import UniProt, MUSCLE
        >>> u = UniProt(verbose=False)
        >>> f1 = u.get_fasta("P18413")
        >>> f2 = u.get_fasta("P18412")
        >>> m = MUSCLE(verbose=False)
        >>> jobid = m.run(frmt="fasta", sequence=f1+f2, email="name@provider")
        >>> m.getResult(jobid, "out")

    """
    def __init__(self, verbose=False):
        url = "http://www.ebi.ac.uk/Tools/services/rest/muscle"
        self.services = REST(name='MUSCLE', url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self._headers = {
            "User-Agent": self.services.getUserAgent(),
            "accept": "application/json"
        }

    def get_parameters(self):
        """List parameter names.

         :returns: An XML document containing a list of parameter names.

         ::

             >>> from bioservices import muscle
             >>> n = muscle.Muscle()
             >>> res = n.get_parameters()
             >>> [x.text for x in res.findAll("id")]

         .. seealso:: :attr:`parameters` to get a list of the parameters without
            need to process the XML output.
        """

        res = self.services.http_get("parameters",
                                     frmt="json",
                                     headers=self._headers)
        return res['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

          :returns: An XML document providing details about the parameter or a list
              of values that can take the parameters if the XML could be parsed.

          For example::

              >>> n.get_parameter_details("format")

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request,
                                         frmt="json",
                                         headers=self._headers)
            self._parametersDetails[parameterId] = res
        return res

    def run(self, frmt=None, sequence=None, tree="none", email=None):
        """ Submit a job with the specified parameters.

        .. python ncbiblast_urllib2.py -D ENSEMBL --email "*****@*****.**" --sequence
        .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS
        .. --program blastp --database uniprotkb


        .. rubric:: Compulsary arguments

        :param str frmt: input format (e.g., fasta)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param str tree: tree type ('none','tree1','tree2')
        :param str email: a valid email address. Will be checked by the service itself.

        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up to data values accepted for each of these parameters can be
        retrieved from the :meth:`get_parameter_details`.

        For instance,::

            from bioservices import MUSCLE
            m = MUSCLE()
            m.parameterDetails("tree")

        Example::

            jobid = m.run(frmt="fasta",
                 sequence=sequence_example,
                 email="*****@*****.**")

        frmt can be a list of formats::

            frmt=['fasta','clw','clwstrict','html','msf','phyi','phys']

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/geeting the results.

        .. seealso:: :meth:`getResult`

        """
        # There are compulsary arguments:
        if frmt is None or sequence is None or email is None:
            raise ValueError("frmt, sequence and email must be provided")

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better

        # FIXME: return parameters from server are not valid
        self.services.devtools.check_param_in_list(
            frmt, ['fasta', 'clw', 'clwstrict', 'html', 'msf', 'phyi', 'phys'])
        self.services.devtools.check_param_in_list(tree,
                                                   ['none', 'tree1', 'tree2'])

        # parameter structure
        params = {'format': frmt, 'sequence': sequence, 'email': email}

        # headers is muscle is not required. If provided
        # by the default values from bioservices, it does not
        # work.
        headers = {}

        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post("run",
                                      data=params,
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "text/plain"
                                      })
        return res

    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid:
        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get("status/{}".format(jobid),
                                     frmt="txt",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "text/plain"
                                     })
        return res

    def get_result_types(self, jobid):
        """ Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :param bool verbose: print the identifiers together with their label,
            mediaTypes, description and filesuffix.

        :return: A dictionary, which keys correspond to the identifiers. Each
            identifier is itself a dictionary containing the label, description,
            file suffix and mediaType of the identifier.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = 'resulttypes/' + jobid
        res = self.services.http_get(url,
                                     frmt="json",
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })
        return [x["identifier"] for x in res['types']]

    def get_result(self, jobid, result_type):
        """ Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str  resultType: type of result to retrieve. See :meth:`getResultTypes`.
 
        """
        if self.get_status(jobid) != 'FINISHED':  #pragma: no cover
            self.services.logging.warning(
                "waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)

        if self.get_status(jobid) != "FINISHED":  #pragma: no cover
            raise ValueError("job is not finished")

        assert result_type in self.get_result_types(jobid)
        url = '/result/' + jobid + '/' + result_type

        if result_type in ['out', 'sequence', "aln-fasta", "pim", "phylotree"]:
            frmt = "txt"
        res = self.services.http_get(url,
                                     frmt=frmt,
                                     headers={
                                         "User-Agent":
                                         self.services.getUserAgent(),
                                         "accept": "application/json"
                                     })

        return res

    def wait(self, jobId, checkInterval=5, verbose=True):
        """This function checks the status of a jobid while it is running

        :param str jobid: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between requests in seconds.

        """
        if checkInterval < 1:  #prgma: no cover
            raise ValueError(
                "checkInterval must be positive and less than minute")
        result = 'PENDING'
        while result == 'RUNNING' or result == 'PENDING':
            result = self.get_status(jobId)
            if verbose:
                # required from __future__ import print_function
                print("WARNING: ", jobId, " is ", result, file=sys.stderr)

            if result == 'RUNNING' or result == 'PENDING':
                time.sleep(checkInterval)
        return result