Example #1
0
    def __init__(self,
                 *args,
                 keep_local=False,
                 stay_on_remote=False,
                 provider=None,
                 email=None,
                 db=None,
                 rettype=None,
                 retmode=None,
                 **kwargs):
        super(RemoteObject, self).__init__(*args,
                                           keep_local=keep_local,
                                           stay_on_remote=stay_on_remote,
                                           provider=provider,
                                           email=email,
                                           db=db,
                                           rettype=rettype,
                                           retmode=retmode,
                                           **kwargs)
        if provider:
            self._ncbi = provider.remote_interface()
        else:
            self._ncbi = NCBIHelper(*args, email=email, **kwargs)

        if db and not self._ncbi.is_valid_db(db):
            raise NCBIFileException(
                "DB specified is not valid. Options include: {dbs}".format(
                    dbs=", ".join(self._ncbi.valid_dbs)))
        else:
            self.db = db

        self.rettype = rettype
        self.retmode = retmode
        self.kwargs = kwargs
Example #2
0
 def mtime(self):
     if self.exists():
         return self._ncbi.mtime(self.accession, db=self.db)
     else:
         raise NCBIFileException(
             "The record does not seem to exist remotely: %s" %
             self.accession)
Example #3
0
 def is_valid_db_request(self, db, rettype, retmode):
     if not self.is_valid_db(db):
         raise NCBIFileException(
             "DB specified is not valid. Options include: {dbs}".format(
                 dbs=", ".join(self.valid_dbs)))
     db_options = self.efetch_options[db]
     for opt in db_options:
         if opt["rettype"] == rettype and opt["retmode"] == retmode:
             return True
     return False
Example #4
0
    def guess_db_options_for_extension(self,
                                       file_ext,
                                       db=None,
                                       rettype=None,
                                       retmode=None):
        if db and rettype and retmode:
            if self.is_valid_db_request(db, rettype, retmode):
                request_options = {}
                request_options["db"] = db
                request_options["rettype"] = rettype
                request_options["retmode"] = retmode
                request_options["ext"] = file_ext
                return request_options

        possible_dbs = [db] if db else self.dbs_for_options(
            file_ext, rettype, retmode)

        if len(possible_dbs) > 1:
            raise NCBIFileException(
                'Ambigious db for file extension specified: "{}"; possible databases include: {}'
                .format(file_ext, ", ".join(list(possible_dbs))))
        elif len(possible_dbs) == 1:
            likely_db = possible_dbs.pop()

            likely_options = self.options_for_db_and_extension(
                likely_db, file_ext, rettype, retmode)
            if len(likely_options) == 1:
                request_options = {}
                request_options["db"] = likely_db
                request_options["rettype"] = likely_options[0]["rettype"]
                request_options["retmode"] = likely_options[0]["retmode"]
                request_options["ext"] = likely_options[0]["ext"]
                return request_options
            elif len(likely_options) > 1:
                raise NCBIFileException(
                    "Please clarify the rettype and retmode. Multiple request types are possible for the file extension ({}) specified: {}"
                    .format(file_ext, likely_options))
            else:
                raise NCBIFileException(
                    "No request options found. Please check the file extension ({}), db ({}), rettype ({}), and retmode ({}) specified."
                    .format(file_ext, db, rettype, retmode))
Example #5
0
 def download(self):
     if self.exists():
         self._ncbi.fetch_from_ncbi([self.accession],
                                    os.path.dirname(self.accession),
                                    rettype=self.rettype,
                                    retmode=self.retmode,
                                    file_ext=self.file_ext,
                                    db=self.db,
                                    **self.kwargs)
     else:
         raise NCBIFileException(
             "The record does not seem to exist remotely: %s" %
             self.accession)
Example #6
0
    def exists(self, accession, db="nuccore"):
        result = self.entrez.esearch(db=db, term=accession, rettype="count")

        root = ET.fromstring(result.read())
        nodes = root.findall(".//Count")

        count = 0
        if len(nodes):
            count = int(nodes[0].text)
        else:
            raise NCBIFileException("The esummary query failed.")

        if count == 1:
            return True
        else:
            logger.warning(
                'The accession specified, "{acc}", could not be found in the database "{db}".\nConsider if you may need to specify a different database via "db=<db_id>".'
                .format(acc=accession, db=db))
            return False
Example #7
0
    def _esummary_and_parse(self,
                            accession,
                            xpath_selector,
                            db="nuccore",
                            return_type=int,
                            raise_on_failure=True,
                            retmode="xml",
                            **kwargs):
        result = self.entrez.esummary(db=db, id=accession, **kwargs)

        root = ET.fromstring(result.read())
        nodes = root.findall(xpath_selector)

        retval = 0
        if len(nodes):
            retval = return_type(nodes[0].text)
        else:
            if raise_on_failure:
                raise NCBIFileException("The esummary query failed.")

        return retval
Example #8
0
    def options_for_db_and_extension(self,
                                     db,
                                     file_ext,
                                     rettype=None,
                                     retmode=None):
        possible_options = []
        assert file_ext, "file_ext must be defined"

        if not self.is_valid_db(db):
            raise NCBIFileException(
                "DB specified is not valid. Options include: {dbs}".format(
                    dbs=", ".join(self.valid_dbs)))

        db_options = self.efetch_options[db]
        for opt in db_options:
            if file_ext == opt["ext"]:
                if retmode and opt["retmode"] != retmode:
                    continue
                if rettype and opt["rettype"] != rettype:
                    continue
                possible_options.append(opt)

        return possible_options
Example #9
0
    def __init__(self, *args, email=None, **kwargs):
        if not email:
            raise NCBIFileException(
                "An e-mail address must be provided to either the remote file or the RemoteProvider() as email=<your_address>. The NCBI requires e-mail addresses for queries."
            )

        self.email = email
        self.entrez = Entrez
        self.entrez.email = self.email
        self.entrez.tool = "Snakemake"

        # valid NCBI Entrez efetch options
        # via https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly
        self.efetch_options = {
            "bioproject": [{
                "rettype": "xml",
                "retmode": "xml",
                "ext": "xml"
            }],
            "biosample": [
                {
                    "rettype": "full",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "full",
                    "retmode": "text",
                    "ext": "txt"
                },
            ],
            "biosystems": [{
                "rettype": "xml",
                "retmode": "xml",
                "ext": "xml"
            }],
            "gds": [{
                "rettype": "summary",
                "retmode": "text",
                "ext": "txt"
            }],
            "gene": [
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "gene_table",
                    "retmode": "text",
                    "ext": "gene_table"
                },
            ],
            "homologene": [
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "alignmentscores",
                    "retmode": "text",
                    "ext": "alignmentscores",
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "homologene",
                    "retmode": "text",
                    "ext": "homologene"
                },
            ],
            "mesh": [{
                "rettype": "full",
                "retmode": "text",
                "ext": "txt"
            }],
            "nlmcatalog": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
            ],
            "nuccore": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "native",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "acc",
                    "retmode": "text",
                    "ext": "acc"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "fasta",
                    "retmode": "xml",
                    "ext": "fasta.xml"
                },
                {
                    "rettype": "seqid",
                    "retmode": "text",
                    "ext": "seqid"
                },
                {
                    "rettype": "gb",
                    "retmode": "text",
                    "ext": "gb"
                },
                {
                    "rettype": "gb",
                    "retmode": "xml",
                    "ext": "gb.xml"
                },
                {
                    "rettype": "gbc",
                    "retmode": "xml",
                    "ext": "gbc"
                },
                {
                    "rettype": "ft",
                    "retmode": "text",
                    "ext": "ft"
                },
                {
                    "rettype": "gbwithparts",
                    "retmode": "text",
                    "ext": "gbwithparts"
                },
                {
                    "rettype": "fasta_cds_na",
                    "retmode": "text",
                    "ext": "fasta_cds_na"
                },
                {
                    "rettype": "fasta_cds_aa",
                    "retmode": "text",
                    "ext": "fasta_cds_aa"
                },
            ],
            "nucest": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "native",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "acc",
                    "retmode": "text",
                    "ext": "acc"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "fasta",
                    "retmode": "xml",
                    "ext": "fasta.xml"
                },
                {
                    "rettype": "seqid",
                    "retmode": "text",
                    "ext": "seqid"
                },
                {
                    "rettype": "gb",
                    "retmode": "text",
                    "ext": "gb"
                },
                {
                    "rettype": "gb",
                    "retmode": "xml",
                    "ext": "gb.xml"
                },
                {
                    "rettype": "gbc",
                    "retmode": "xml",
                    "ext": "gbc"
                },
                {
                    "rettype": "est",
                    "retmode": "text",
                    "ext": "est"
                },
            ],
            "nucgss": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "native",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "acc",
                    "retmode": "text",
                    "ext": "acc"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "fasta",
                    "retmode": "xml",
                    "ext": "fasta.xml"
                },
                {
                    "rettype": "seqid",
                    "retmode": "text",
                    "ext": "seqid"
                },
                {
                    "rettype": "gb",
                    "retmode": "text",
                    "ext": "gb"
                },
                {
                    "rettype": "gb",
                    "retmode": "xml",
                    "ext": "gb.xml"
                },
                {
                    "rettype": "gbc",
                    "retmode": "xml",
                    "ext": "gbc"
                },
                {
                    "rettype": "gss",
                    "retmode": "text",
                    "ext": "gss"
                },
            ],
            "protein": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "native",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "acc",
                    "retmode": "text",
                    "ext": "acc"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "fasta",
                    "retmode": "xml",
                    "ext": "fasta.xml"
                },
                {
                    "rettype": "seqid",
                    "retmode": "text",
                    "ext": "seqid"
                },
                {
                    "rettype": "ft",
                    "retmode": "text",
                    "ext": "ft"
                },
                {
                    "rettype": "gp",
                    "retmode": "text",
                    "ext": "gp"
                },
                {
                    "rettype": "gp",
                    "retmode": "xml",
                    "ext": "gp.xml"
                },
                {
                    "rettype": "gpc",
                    "retmode": "xml",
                    "ext": "gpc"
                },
                {
                    "rettype": "ipg",
                    "retmode": "xml",
                    "ext": "xml"
                },
            ],
            "popset": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "native",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "acc",
                    "retmode": "text",
                    "ext": "acc"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "fasta",
                    "retmode": "xml",
                    "ext": "fasta.xml"
                },
                {
                    "rettype": "seqid",
                    "retmode": "text",
                    "ext": "seqid"
                },
                {
                    "rettype": "gb",
                    "retmode": "text",
                    "ext": "gb"
                },
                {
                    "rettype": "gb",
                    "retmode": "xml",
                    "ext": "gb.xml"
                },
                {
                    "rettype": "gbc",
                    "retmode": "xml",
                    "ext": "gbc"
                },
            ],
            "pmc": [
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "medline",
                    "retmode": "text",
                    "ext": "medline"
                },
            ],
            "pubmed": [
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "medline",
                    "retmode": "text",
                    "ext": "medline"
                },
                {
                    "rettype": "uilist",
                    "retmode": "text",
                    "ext": "uilist"
                },
                {
                    "rettype": "abstract",
                    "retmode": "text",
                    "ext": "abstract"
                },
            ],
            "sequences": [
                {
                    "rettype": "null",
                    "retmode": "text",
                    "ext": "txt"
                },
                {
                    "rettype": "acc",
                    "retmode": "text",
                    "ext": "acc"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "seqid",
                    "retmode": "text",
                    "ext": "seqid"
                },
            ],
            "snp": [
                {
                    "rettype": "null",
                    "retmode": "asn.1",
                    "ext": "asn1"
                },
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "flt",
                    "retmode": "text",
                    "ext": "flt"
                },
                {
                    "rettype": "fasta",
                    "retmode": "text",
                    "ext": "fasta"
                },
                {
                    "rettype": "rsr",
                    "retmode": "text",
                    "ext": "rsr"
                },
                {
                    "rettype": "ssexemplar",
                    "retmode": "text",
                    "ext": "ssexemplar"
                },
                {
                    "rettype": "chr",
                    "retmode": "text",
                    "ext": "chr"
                },
                {
                    "rettype": "docset",
                    "retmode": "text",
                    "ext": "docset"
                },
                {
                    "rettype": "uilist",
                    "retmode": "text",
                    "ext": "uilist"
                },
                {
                    "rettype": "uilist",
                    "retmode": "xml",
                    "ext": "uilist.xml"
                },
            ],
            "sra": [{
                "rettype": "full",
                "retmode": "xml",
                "ext": "xml"
            }],
            "taxonomy": [
                {
                    "rettype": "null",
                    "retmode": "xml",
                    "ext": "xml"
                },
                {
                    "rettype": "uilist",
                    "retmode": "text",
                    "ext": "uilist"
                },
                {
                    "rettype": "uilist",
                    "retmode": "xml",
                    "ext": "uilist.xml"
                },
            ],
        }
Example #10
0
 def list(self):
     raise NCBIFileException(
         "The NCBI Remote Provider does not currently support list-based operations like glob_wildcards()."
     )
Example #11
0
 def upload(self):
     raise NCBIFileException(
         "Upload is not permitted for the NCBI remote provider. Is an output set to NCBI.RemoteProvider.remote()?"
     )
Example #12
0
 def result_ids(json):
     if ("esearchresult" in json_results
             and "idlist" in json_results["esearchresult"]):
         return json_results["esearchresult"]["idlist"]
     else:
         raise NCBIFileException("ESearch error")