def __init__(self, *args, keep_local=False, stay_on_remote=False, provider=None, email=None, db=None, rettype=None, retmode=None, **kwargs): super(RemoteObject, self).__init__(*args, keep_local=keep_local, stay_on_remote=stay_on_remote, provider=provider, email=email, db=db, rettype=rettype, retmode=retmode, **kwargs) if provider: self._ncbi = provider.remote_interface() else: self._ncbi = NCBIHelper(*args, email=email, **kwargs) if db and not self._ncbi.is_valid_db(db): raise NCBIFileException( "DB specified is not valid. Options include: {dbs}".format( dbs=", ".join(self._ncbi.valid_dbs))) else: self.db = db self.rettype = rettype self.retmode = retmode self.kwargs = kwargs
def mtime(self): if self.exists(): return self._ncbi.mtime(self.accession, db=self.db) else: raise NCBIFileException( "The record does not seem to exist remotely: %s" % self.accession)
def is_valid_db_request(self, db, rettype, retmode): if not self.is_valid_db(db): raise NCBIFileException( "DB specified is not valid. Options include: {dbs}".format( dbs=", ".join(self.valid_dbs))) db_options = self.efetch_options[db] for opt in db_options: if opt["rettype"] == rettype and opt["retmode"] == retmode: return True return False
def guess_db_options_for_extension(self, file_ext, db=None, rettype=None, retmode=None): if db and rettype and retmode: if self.is_valid_db_request(db, rettype, retmode): request_options = {} request_options["db"] = db request_options["rettype"] = rettype request_options["retmode"] = retmode request_options["ext"] = file_ext return request_options possible_dbs = [db] if db else self.dbs_for_options( file_ext, rettype, retmode) if len(possible_dbs) > 1: raise NCBIFileException( 'Ambigious db for file extension specified: "{}"; possible databases include: {}' .format(file_ext, ", ".join(list(possible_dbs)))) elif len(possible_dbs) == 1: likely_db = possible_dbs.pop() likely_options = self.options_for_db_and_extension( likely_db, file_ext, rettype, retmode) if len(likely_options) == 1: request_options = {} request_options["db"] = likely_db request_options["rettype"] = likely_options[0]["rettype"] request_options["retmode"] = likely_options[0]["retmode"] request_options["ext"] = likely_options[0]["ext"] return request_options elif len(likely_options) > 1: raise NCBIFileException( "Please clarify the rettype and retmode. Multiple request types are possible for the file extension ({}) specified: {}" .format(file_ext, likely_options)) else: raise NCBIFileException( "No request options found. Please check the file extension ({}), db ({}), rettype ({}), and retmode ({}) specified." .format(file_ext, db, rettype, retmode))
def download(self): if self.exists(): self._ncbi.fetch_from_ncbi([self.accession], os.path.dirname(self.accession), rettype=self.rettype, retmode=self.retmode, file_ext=self.file_ext, db=self.db, **self.kwargs) else: raise NCBIFileException( "The record does not seem to exist remotely: %s" % self.accession)
def exists(self, accession, db="nuccore"): result = self.entrez.esearch(db=db, term=accession, rettype="count") root = ET.fromstring(result.read()) nodes = root.findall(".//Count") count = 0 if len(nodes): count = int(nodes[0].text) else: raise NCBIFileException("The esummary query failed.") if count == 1: return True else: logger.warning( 'The accession specified, "{acc}", could not be found in the database "{db}".\nConsider if you may need to specify a different database via "db=<db_id>".' .format(acc=accession, db=db)) return False
def _esummary_and_parse(self, accession, xpath_selector, db="nuccore", return_type=int, raise_on_failure=True, retmode="xml", **kwargs): result = self.entrez.esummary(db=db, id=accession, **kwargs) root = ET.fromstring(result.read()) nodes = root.findall(xpath_selector) retval = 0 if len(nodes): retval = return_type(nodes[0].text) else: if raise_on_failure: raise NCBIFileException("The esummary query failed.") return retval
def options_for_db_and_extension(self, db, file_ext, rettype=None, retmode=None): possible_options = [] assert file_ext, "file_ext must be defined" if not self.is_valid_db(db): raise NCBIFileException( "DB specified is not valid. Options include: {dbs}".format( dbs=", ".join(self.valid_dbs))) db_options = self.efetch_options[db] for opt in db_options: if file_ext == opt["ext"]: if retmode and opt["retmode"] != retmode: continue if rettype and opt["rettype"] != rettype: continue possible_options.append(opt) return possible_options
def __init__(self, *args, email=None, **kwargs): if not email: raise NCBIFileException( "An e-mail address must be provided to either the remote file or the RemoteProvider() as email=<your_address>. The NCBI requires e-mail addresses for queries." ) self.email = email self.entrez = Entrez self.entrez.email = self.email self.entrez.tool = "Snakemake" # valid NCBI Entrez efetch options # via https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly self.efetch_options = { "bioproject": [{ "rettype": "xml", "retmode": "xml", "ext": "xml" }], "biosample": [ { "rettype": "full", "retmode": "xml", "ext": "xml" }, { "rettype": "full", "retmode": "text", "ext": "txt" }, ], "biosystems": [{ "rettype": "xml", "retmode": "xml", "ext": "xml" }], "gds": [{ "rettype": "summary", "retmode": "text", "ext": "txt" }], "gene": [ { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "null", "retmode": "xml", "ext": "xml" }, { "rettype": "gene_table", "retmode": "text", "ext": "gene_table" }, ], "homologene": [ { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "null", "retmode": "xml", "ext": "xml" }, { "rettype": "alignmentscores", "retmode": "text", "ext": "alignmentscores", }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "homologene", "retmode": "text", "ext": "homologene" }, ], "mesh": [{ "rettype": "full", "retmode": "text", "ext": "txt" }], "nlmcatalog": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "null", "retmode": "xml", "ext": "xml" }, ], "nuccore": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "native", "retmode": "xml", "ext": "xml" }, { "rettype": "acc", "retmode": "text", "ext": "acc" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "fasta", "retmode": "xml", "ext": "fasta.xml" }, { "rettype": "seqid", "retmode": "text", "ext": "seqid" }, { "rettype": "gb", "retmode": "text", "ext": "gb" }, { "rettype": "gb", "retmode": "xml", "ext": "gb.xml" }, { "rettype": "gbc", "retmode": "xml", "ext": "gbc" }, { "rettype": "ft", "retmode": "text", "ext": "ft" }, { "rettype": "gbwithparts", "retmode": "text", "ext": "gbwithparts" }, { "rettype": "fasta_cds_na", "retmode": "text", "ext": "fasta_cds_na" }, { "rettype": "fasta_cds_aa", "retmode": "text", "ext": "fasta_cds_aa" }, ], "nucest": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "native", "retmode": "xml", "ext": "xml" }, { "rettype": "acc", "retmode": "text", "ext": "acc" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "fasta", "retmode": "xml", "ext": "fasta.xml" }, { "rettype": "seqid", "retmode": "text", "ext": "seqid" }, { "rettype": "gb", "retmode": "text", "ext": "gb" }, { "rettype": "gb", "retmode": "xml", "ext": "gb.xml" }, { "rettype": "gbc", "retmode": "xml", "ext": "gbc" }, { "rettype": "est", "retmode": "text", "ext": "est" }, ], "nucgss": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "native", "retmode": "xml", "ext": "xml" }, { "rettype": "acc", "retmode": "text", "ext": "acc" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "fasta", "retmode": "xml", "ext": "fasta.xml" }, { "rettype": "seqid", "retmode": "text", "ext": "seqid" }, { "rettype": "gb", "retmode": "text", "ext": "gb" }, { "rettype": "gb", "retmode": "xml", "ext": "gb.xml" }, { "rettype": "gbc", "retmode": "xml", "ext": "gbc" }, { "rettype": "gss", "retmode": "text", "ext": "gss" }, ], "protein": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "native", "retmode": "xml", "ext": "xml" }, { "rettype": "acc", "retmode": "text", "ext": "acc" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "fasta", "retmode": "xml", "ext": "fasta.xml" }, { "rettype": "seqid", "retmode": "text", "ext": "seqid" }, { "rettype": "ft", "retmode": "text", "ext": "ft" }, { "rettype": "gp", "retmode": "text", "ext": "gp" }, { "rettype": "gp", "retmode": "xml", "ext": "gp.xml" }, { "rettype": "gpc", "retmode": "xml", "ext": "gpc" }, { "rettype": "ipg", "retmode": "xml", "ext": "xml" }, ], "popset": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "native", "retmode": "xml", "ext": "xml" }, { "rettype": "acc", "retmode": "text", "ext": "acc" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "fasta", "retmode": "xml", "ext": "fasta.xml" }, { "rettype": "seqid", "retmode": "text", "ext": "seqid" }, { "rettype": "gb", "retmode": "text", "ext": "gb" }, { "rettype": "gb", "retmode": "xml", "ext": "gb.xml" }, { "rettype": "gbc", "retmode": "xml", "ext": "gbc" }, ], "pmc": [ { "rettype": "null", "retmode": "xml", "ext": "xml" }, { "rettype": "medline", "retmode": "text", "ext": "medline" }, ], "pubmed": [ { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "null", "retmode": "xml", "ext": "xml" }, { "rettype": "medline", "retmode": "text", "ext": "medline" }, { "rettype": "uilist", "retmode": "text", "ext": "uilist" }, { "rettype": "abstract", "retmode": "text", "ext": "abstract" }, ], "sequences": [ { "rettype": "null", "retmode": "text", "ext": "txt" }, { "rettype": "acc", "retmode": "text", "ext": "acc" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "seqid", "retmode": "text", "ext": "seqid" }, ], "snp": [ { "rettype": "null", "retmode": "asn.1", "ext": "asn1" }, { "rettype": "null", "retmode": "xml", "ext": "xml" }, { "rettype": "flt", "retmode": "text", "ext": "flt" }, { "rettype": "fasta", "retmode": "text", "ext": "fasta" }, { "rettype": "rsr", "retmode": "text", "ext": "rsr" }, { "rettype": "ssexemplar", "retmode": "text", "ext": "ssexemplar" }, { "rettype": "chr", "retmode": "text", "ext": "chr" }, { "rettype": "docset", "retmode": "text", "ext": "docset" }, { "rettype": "uilist", "retmode": "text", "ext": "uilist" }, { "rettype": "uilist", "retmode": "xml", "ext": "uilist.xml" }, ], "sra": [{ "rettype": "full", "retmode": "xml", "ext": "xml" }], "taxonomy": [ { "rettype": "null", "retmode": "xml", "ext": "xml" }, { "rettype": "uilist", "retmode": "text", "ext": "uilist" }, { "rettype": "uilist", "retmode": "xml", "ext": "uilist.xml" }, ], }
def list(self): raise NCBIFileException( "The NCBI Remote Provider does not currently support list-based operations like glob_wildcards()." )
def upload(self): raise NCBIFileException( "Upload is not permitted for the NCBI remote provider. Is an output set to NCBI.RemoteProvider.remote()?" )
def result_ids(json): if ("esearchresult" in json_results and "idlist" in json_results["esearchresult"]): return json_results["esearchresult"]["idlist"] else: raise NCBIFileException("ESearch error")