def get_NCBItaxid(accession, email=None, api_key=None, engine=None): """ Access NCBI to retrieve taxid information from the accession number :param engine: connection to local database :param api_key: API key for 10 requests per second :param accession: NCBI accession number :return: taxid esearch -db nuccore -query LC315594.1|elink -target taxonomy |efetch -format uid """ print('Get taxid with engine', engine) if engine is not None: query = "SELECT taxid FROM taxas WHERE accession='%s'" % accession tax_id = engine.execute(query).first()[0] else: if not email: raise BaseException('Please provide a valid email address') try: query_service = eutils.QueryService(email=email, api_key=api_key) xml = query_service.elink({ 'dbfrom': 'nuccore', 'db': 'taxonomy', 'id': accession }).decode('utf') tax_id = xml[xml.rfind('<Id>') + 4:xml.rfind('</Id>')] except eutils.exceptions.EutilsRequestError: print(accession) raise return tax_id
def get_eutils_client(cache_path, cache=None): """ :param cache_path: valid filesystem path to SQLite cache file :param api_key: (optional) NCBI API Key obtainable from https://www.ncbi.nlm.nih.gov :return: eutils Client object """ import eutils return eutils.QueryService(cache_path, api_key=API_KEY, cache=cache)
"/', 'void.ttl', '" + inchikey_uri + "');\n") # Elink if todo_Elink: run_as_test = config['ELINK'].getboolean('run_as_test') apiKey = config['ELINK'].get('api_key') pmid_cid_version = args.version pack_size = config['ELINK'].getint('pack_size') timeout = config['ELINK'].getint('timeout') max_triples_by_files = config['ELINK'].getint('max_triples_by_files') all_pmids = None # Building requests query_builder = eutils.QueryService(cache=False, default_args={ 'retmax': 10000000, 'retmode': 'xml', 'usehistory': 'n' }, api_key=apiKey, email="*****@*****.**") # Build Elink ressource creator: pmid_cid = Elink_ressource_creator( ressource_name="PMID_CID", version=pmid_cid_version, dbfrom="pubmed", db="pccompound", ns_linking_id=("reference", "PMID"), ns_linked_id=("compound", "CID"), ns_endpoint=("endpoint", ""), primary_predicate=("cito", "discusses"), secondary_predicate=("cito", "isCitedAsDataSourceBy"), namespaces=namespaces,
def qs(): return eutils.QueryService()