def __init__(self, search_queries=None, search_term_sets=None, sleep_time=1, database_id="WOS", search_client="Lite"): """ Initialize search queries. Keyword arguments: search_queries (list) -- Provide fully created queries in a list. search_terms (list) -- Provide series of searches as key-value pairs in a list. """ self.search_queries = search_queries self.search_term_sets = search_term_sets if not search_queries and not search_term_sets: print( "No searches provided. Include either 'search_queries' or 'search_terms' in argument." ) self.search_client = search_client self.database_id = database_id self.wos = Wos(sleep_time=sleep_time, client=self.search_client) self.wos.authorize() self.wos.retrieve_parameters()
def _getpidclass(type, baseval): baseval = baseval.strip().replace('\n', '') if type.strip().lower() == "doi": return Doi(baseval) if type.strip().lower() == "urn" or type.strip().lower( ) == "urn:nbn" or type.strip().lower() == "urn:nbn:nl" or type.strip( ).lower() == "urnnbn" or type.strip().lower() == "nbn": return Nbn(baseval) if type.strip().lower() == "arxiv": return ArXiv(baseval) if type.strip().lower() == "href" or type.strip().lower( ) == "http" or type.strip().lower() == "url" or type.strip().lower( ) == "purl": # :mods:relatedItem/@xlink:href wordt met type 'url' opgezet... if ("doi.org/" in baseval.lower()): return Doi(baseval) return Purl(baseval) if type.strip().lower() == "issn": return Issn(baseval) if type.strip().lower() == "isbn": return Isbn(baseval) if type.strip().lower() == "pmid" or type.strip().lower() == "pubmed": return Pmid(baseval) if type.strip().lower() == "pure" or "pure" in type.lower(): return Pure(baseval) if type.strip().lower() == "wos": return Wos(baseval) if type.strip().lower() == "scopus": return Scopus(baseval) if type.strip().lower() == "uri": if ("issn" in baseval.lower()): return Issn(baseval) if ("isbn" in baseval.lower()): return Isbn(baseval) if ("doi" in baseval.lower()): return Doi(baseval) if ( "/handle/" in baseval.lower() or "hdl/" in baseval.lower() or "handle.net" in baseval.lower() ): #handle may resolve locally, prefixed with local url-resolver, so return url type if given so. return Handle(baseval) if ("urn:nbn:nl" in baseval.lower()): return Nbn(baseval) if ("http" in baseval.lower() or "ftp" in baseval.lower()): return Purl(baseval) if type.strip().lower() == "handle" or type.strip().lower( ) == "handle.net" or type.strip().lower() == "hdl": return Handle(baseval) return Unknown(type.strip().replace('\n', ''), baseval)
def __init__(self, search_queries=None, search_term_sets=None, sleep_time=1, database_id="WOS", search_client="Lite"): """ Initialize search queries. Keyword arguments: search_queries (list) -- Provide fully created queries in a list. search_terms (list) -- Provide series of searches as key-value pairs in a list. """ self.search_queries = search_queries self.search_term_sets = search_term_sets if not search_queries and not search_term_sets: print "No searches provided. Include either 'search_queries' or 'search_terms' in argument." self.search_client = search_client self.database_id = database_id self.wos = Wos(sleep_time=sleep_time, client=self.search_client) self.wos.authorize() self.wos.retrieve_parameters()
class WosCalls(): """Run searches against the WOS API using the Wos class.""" def __init__(self, search_queries=None, search_term_sets=None, sleep_time=1, database_id="WOS", search_client="Lite"): """ Initialize search queries. Keyword arguments: search_queries (list) -- Provide fully created queries in a list. search_terms (list) -- Provide series of searches as key-value pairs in a list. """ self.search_queries = search_queries self.search_term_sets = search_term_sets if not search_queries and not search_term_sets: print( "No searches provided. Include either 'search_queries' or 'search_terms' in argument." ) self.search_client = search_client self.database_id = database_id self.wos = Wos(sleep_time=sleep_time, client=self.search_client) self.wos.authorize() self.wos.retrieve_parameters() def get_all_search_results(self): """Return all results from queries defined in __init__.""" # Keep track of total result count across all searches. self.total_results = 0 for search_query in self.search_queries: self.__run_search(search_query) print("Process complete.") print("Returned {0} records".format(self.total_results)) def __run_search(self, query): """ Communicate with the WOS class to run a search. args: query (str): complete and well-formatted search query. """ self.wos.query_parameters(query, database_id=self.database_id) self.wos.search(self.wos.qp, self.wos.retrieve_parameters()) self.total_results += self.wos.records_found # WOS imposes a limit on number of searches per session -- check after each query # and restart session if necessary. self.check_session() def find_exact_match(self): """Search for known item. If more than one result returned, sift through results to find most appropriate match. If one record can't be isolated store all best guesses as matches for further manual editing. """ self.total_results = 0 self.article_data = {} count = 0 errors = 0 for search_data in self.search_term_sets: count += 1 print(count, ) all_results = [] self.search_data_update = search_data.copy() try: self.__run_search(search_data["query"]) # Return 1 result, assume with reasonable confidence this is the # 'correct' hit. if self.wos.records_found == 1: metalite = MetaWosLite( dict(self.wos.search_results.records[0])) wos_metadata = metalite.get_metadata() self.search_data_update.update(wos_metadata) # TODO Fix result count. It always comes out 1. self.search_data_update["wos_result_count"] = 1 all_results = [self.search_data_update] # With more than 1 results, attempt to sift to find 1 correct, or several # plausible results to store. elif self.wos.records_found > 1: result_count = 0 for search_record in self.wos.search_results.records: metalite = MetaWosLite(dict(search_record)) wos_metadata = metalite.get_metadata() sifter = SiftSearchResults(self.search_data_update, wos_metadata) sift_result = sifter.assess_match() print("----{0}".format(sift_result)) if sift_result == "exact_match": self.search_data_update.update(wos_metadata) result_count = 1 all_results = [self.search_data_update] break elif sift_result == "probable_match": result_count += 1 pmatch = self.search_data_update.copy() pmatch.update(wos_metadata) all_results.append(pmatch) print("----Storing {0} record(s)".format(len(all_results))) else: all_results = [self.search_data_update] self.article_data[search_data["id"]] = all_results except Exception as e: print(e) if "Throttle" in e or "throttle" in e: time.sleep(60) errors += 1 #print self.article_data print("Processed {0} records, Encountered {1} errors.".format( count, errors)) def run_phylo_process(self): """ Search algorithm designed specifically to work with Prof Ohlrogge's data. """ self.total_results = 0 for search_term_set in self.search_term_sets: query = self.wos.advanced_search(search_term_set, fields=["author", "source", ""]) def check_session(self): """If session has lasted too long, break and restart session.""" if self.wos.total_calls > 2000: self.wos.close_session() def get_citing_articles(self): for record in self.wos.metadata_collection["search_results"]: uid = record["accession_number"] self.wos.citing_articles(uid, self.wos.retrieve_parameters()) self.check_session() print("Process complete.") print("Searched {0} UIDs".format( len(self.wos.metadata_collection["search_results"]))) def get_cited_references(self, get_full_records=True, json_file=None): """ Get all citations mentioned in a given article. Keyword arguments: get_full_records (bool) -- if true, perform title search on references with full metadata. """ if json_file: search_returns = json.load(open(json_file, "r")) else: search_returns = self.wos.metadata_collection["search_results"] for index, record in enumerate(search_returns): print("Record", index) uid = record["accession_number"] self.wos.cited_references( uid, self.wos.retrieve_parameters(option={ "key": "Hot", "value": "On" }), database_id="WOS", get_full_records=get_full_records) self.check_session() print("Process complete.") print("Searched {0} UIDs".format( len(self.wos.metadata_collection["search_results"]))) def make_results_tsv(self, search_type, output_file=None): """Produce TSV output based on metadata gathered.""" if not output_file: output_file = os.path.join( ".", "{0}_results_{1}.tsv".format( search_type, datetime.now().strftime("%Y-%m-%d-%H%M"))) if not self.wos.metadata_collection[search_type]: print("No search results to process.") else: with open(output_file, "w") as fh: fh.write("\t".join(self.wos.metadata_collection[search_type] [0].keys()) + "\n") for record in self.wos.metadata_collection[search_type]: fh.write("\t".join([record[i] for i in record]).encode("utf8")) """ for item in record: fh.write((record[item] + "\t").encode("utf8")) """ fh.write("\n") def make_cited_records_tsv(self, output_file=None): pass
class WosCalls(): """Run searches against the WOS API using the Wos class.""" def __init__(self, search_queries=None, search_term_sets=None, sleep_time=1, database_id="WOS", search_client="Lite"): """ Initialize search queries. Keyword arguments: search_queries (list) -- Provide fully created queries in a list. search_terms (list) -- Provide series of searches as key-value pairs in a list. """ self.search_queries = search_queries self.search_term_sets = search_term_sets if not search_queries and not search_term_sets: print "No searches provided. Include either 'search_queries' or 'search_terms' in argument." self.search_client = search_client self.database_id = database_id self.wos = Wos(sleep_time=sleep_time, client=self.search_client) self.wos.authorize() self.wos.retrieve_parameters() def get_all_search_results(self): """Return all results from queries defined in __init__.""" # Keep track of total result count across all searches. self.total_results = 0 for search_query in self.search_queries: self.__run_search(search_query) print "Process complete." print "Returned {0} records".format(self.total_results) def __run_search(self, query): """ Communicate with the WOS class to run a search. args: query (str): complete and well-formatted search query. """ self.wos.query_parameters(query, database_id=self.database_id) self.wos.search(self.wos.qp, self.wos.retrieve_parameters()) self.total_results += self.wos.records_found # WOS imposes a limit on number of searches per session -- check after each query # and restart session if necessary. self.check_session() def find_exact_match(self): """Search for known item. If more than one result returned, sift through results to find most appropriate match. If one record can't be isolated store all best guesses as matches for further manual editing. """ self.total_results = 0 self.article_data = {} count = 0 errors = 0 for search_data in self.search_term_sets: count += 1 print count, all_results = [] self.search_data_update = search_data.copy() try: self.__run_search(search_data["query"]) # Return 1 result, assume with reasonable confidence this is the # 'correct' hit. if self.wos.records_found == 1: metalite = MetaWosLite(dict(self.wos.search_results.records[0])) wos_metadata = metalite.get_metadata() self.search_data_update.update(wos_metadata) # TODO Fix result count. It always comes out 1. self.search_data_update["wos_result_count"] = 1 all_results = [self.search_data_update] # With more than 1 results, attempt to sift to find 1 correct, or several # plausible results to store. elif self.wos.records_found > 1: result_count = 0 for search_record in self.wos.search_results.records: metalite = MetaWosLite(dict(search_record)) wos_metadata = metalite.get_metadata() sifter = SiftSearchResults(self.search_data_update, wos_metadata) sift_result = sifter.assess_match() print "----{0}".format(sift_result) if sift_result == "exact_match": self.search_data_update.update(wos_metadata) result_count = 1 all_results = [self.search_data_update] break elif sift_result == "probable_match": result_count += 1 pmatch = self.search_data_update.copy() pmatch.update(wos_metadata) all_results.append(pmatch) print "----Storing {0} record(s)".format(len(all_results)) else: all_results = [self.search_data_update] self.article_data[search_data["id"]] = all_results except Exception as e: print e if "Throttle" in e or "throttle" in e: time.sleep(60) errors += 1 #print self.article_data print "Processed {0} records, Encountered {1} errors.".format(count, errors) def run_phylo_process(self): """ Search algorithm designed specifically to work with Prof Ohlrogge's data. """ self.total_results = 0 for search_term_set in self.search_term_sets: query = self.wos.advanced_search(search_term_set, fields=["author", "source", ""]) def check_session(self): """If session has lasted too long, break and restart session.""" if self.wos.total_calls > 2000: self.wos.close_session() def get_citing_articles(self): for record in self.wos.metadata_collection["search_results"]: uid = record["accession_number"] self.wos.citing_articles(uid, self.wos.retrieve_parameters()) self.check_session() print "Process complete." print "Searched {0} UIDs".format(len(self.wos.metadata_collection["search_results"])) def get_cited_references(self, get_full_records=True, json_file=None): """ Get all citations mentioned in a given article. Keyword arguments: get_full_records (bool) -- if true, perform title search on references with full metadata. """ if json_file: search_returns = json.load(open(json_file, "r")) else: search_returns = self.wos.metadata_collection["search_results"] for index, record in enumerate(search_returns): print "Record", index uid = record["accession_number"] self.wos.cited_references(uid, self.wos.retrieve_parameters(option={"key": "Hot", "value": "On"}), database_id="WOS", get_full_records=get_full_records) self.check_session() print "Process complete." print "Searched {0} UIDs".format(len(self.wos.metadata_collection["search_results"])) def make_results_tsv(self, search_type, output_file=None): """Produce TSV output based on metadata gathered.""" if not output_file: output_file = os.path.join(".", "{0}_results_{1}.tsv".format(search_type, datetime.now() .strftime("%Y-%m-%d-%H%M"))) if not self.wos.metadata_collection[search_type]: print "No search results to process." else: with open(output_file, "w") as fh: fh.write("\t".join(self.wos.metadata_collection[search_type][0].keys()) + "\n") for record in self.wos.metadata_collection[search_type]: fh.write("\t".join([record[i] for i in record]).encode("utf8")) """ for item in record: fh.write((record[item] + "\t").encode("utf8")) """ fh.write("\n") def make_cited_records_tsv(self, output_file=None): pass