def _get_metadata(self, query, category, crossref=False): """ Retrieve metadata from all search results and store in dictionary of elements. Positional arguments: query (str) -- the initial query string for the given search results. category (str) -- the container in which to store the gathered metadata. See self.metadata_collection template in __init__. """ # It seems results are returned in a totally different format depending on search client used. # The 'premium' client ("Search") returns XML of matching records. self.crossref = crossref if self.client == "Search": self.tree = etree.fromstring(self.search_results.records) objectify.deannotate(self.tree, cleanup_namespaces=True) for record in self.tree: self.meta_record = MetaWos(record, query) article_metadata = self.meta_record.compile_metadata() if self.citing_metadata: article_metadata["source_id"] = self.uid self.metadata_collection[category].append(article_metadata) # The 'lite' client returns a list of dictionary-like objects elif self.client == "Lite": for record in self.search_results.records: article_metadata = dict(record) if self.crossref: abstract = CrossRef.get_abstract(article_metadata) self.metadata_collection[category].append(article_metadata) else: print("Inappropriate method for metadata retrieval: {0}".format( self.client))
def get_full_record(self): """ Run title search on 'hot' records, that is, ones with WOS ids. Start with most stringent search, then progressively loosen to improve likelihood of a match: Search 1: journal_title AND pub_year AND title Search 2: journal_title AND title Search 3: title """ booleans = ["and", "near", "or", "not"] record_title = " ".join([ w.lower().strip("?;:.,-_()[]<>{}!`'").lstrip().rstrip().replace( "=", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "") for w in self.hot_item["citedTitle"].split() if w.lower() not in booleans ]) if "citedWork" in self.hot_item: journal_title = self.hot_item["citedWork"] else: journal_title = "NONE" if "year" in self.hot_item: pub_year = self.hot_item["year"] else: pub_year = "NONE" self.rp_title_search = self.retrieve_parameters(count="1") try: # Search 1 if self._run_full_record_search(record_title, pub_year=pub_year, journal_title=journal_title) >= 1: self.tree = etree.fromstring(self.title_search_results.records) # Search 2 elif self._run_full_record_search( record_title, journal_title=journal_title) >= 1: self.tree = etree.fromstring(self.title_search_results.records) # Search 3 else: self._run_full_record_search(record_title) self.tree = etree.fromstring(self.title_search_results.records) if self.search_count == 1: for record in self.tree: self.title_meta_record = MetaWos(record, record_title) self.title_metadata = self.title_meta_record.compile_metadata( ) self.title_metadata["source_id"] = self.uid self.metadata_collection["hot_records"].append( self.title_metadata) except Exception as e: print("*******************ERROR*************************") print(record_title) print(e) print("*************************************************")