def metrics( self, aliases, provider_url_template=None, # ignore this because multiple url steps cache_enabled=True): # if haven't loaded batch_data, return no metrics global batch_data if not batch_data: batch_data = self.build_batch_data_dict() pass metrics_and_drilldown = {} # Only lookup metrics for items with appropriate ids from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) try: pmid = aliases_dict["pmid"][0] except KeyError: return {} pmid_alias = ("pmid", pmid) page = "" if pmid_alias in batch_data: pages = [page["raw"] for page in batch_data[pmid_alias]] if page: metrics_and_drilldown = self._get_metrics_and_drilldown( pages, pmid) return metrics_and_drilldown
def metrics(self, aliases, provider_url_template=None, # ignore this because multiple url steps cache_enabled=True): metrics_and_drilldown = {} # Only lookup metrics for items with appropriate ids from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) try: pmid = aliases_dict["pmid"][0] except KeyError: return {} pmid_alias = ("pmid", pmid) page = "" global batch_data if pmid_alias in batch_data: pages = [page["raw"] for page in batch_data[pmid_alias]] if page: metrics_and_drilldown = self._get_metrics_and_drilldown(pages, pmid) return metrics_and_drilldown
def provider_method_wrapper(tiid, input_aliases_dict, provider, method_name): # logger.info(u"{:20}: in provider_method_wrapper with {tiid} {provider_name} {method_name} with {aliases}".format( # "wrapper", tiid=tiid, provider_name=provider.provider_name, method_name=method_name, aliases=input_aliases_dict)) provider_name = provider.provider_name worker_name = provider_name+"_worker" if isinstance(input_aliases_dict, list): input_aliases_dict = item_module.alias_dict_from_tuples(input_aliases_dict) input_alias_tuples = item_module.alias_tuples_from_dict(input_aliases_dict) method = getattr(provider, method_name) try: method_response = method(input_alias_tuples) except ProviderError, e: method_response = None logger.info(u"{:20}: **ProviderError {tiid} {method_name} {provider_name}, Exception type {exception_type} {exception_arguments}".format( worker_name, tiid=tiid, provider_name=provider_name.upper(), method_name=method_name.upper(), exception_type=type(e).__name__, exception_arguments=e.args))
def add_to_database_if_nonzero( tiid, new_content, method_name, provider_name): if new_content: # don't need item with metrics for this purpose, so don't bother getting metrics from db item_obj = item_module.Item.query.get(tiid) if item_obj: if method_name=="aliases": if isinstance(new_content, list): new_content = item_module.alias_dict_from_tuples(new_content) item_obj = item_module.add_aliases_to_item_object(new_content, item_obj) elif method_name=="biblio": updated_item_doc = item_module.update_item_with_new_biblio(new_content, item_obj, provider_name) elif method_name=="metrics": for metric_name in new_content: item_obj = item_module.add_metric_to_item_object(metric_name, new_content[metric_name], item_obj) else: logger.warning(u"ack, supposed to save something i don't know about: " + str(new_content)) return
def wrapper(cls, tiid, input_aliases_dict, provider, method_name, aliases_providers_run, callback): #logger.info(u"{:20}: **Starting {tiid} {provider_name} {method_name} with {aliases}".format( # "wrapper", tiid=tiid, provider_name=provider.provider_name, method_name=method_name, aliases=aliases)) provider_name = provider.provider_name worker_name = provider_name + "_worker" input_alias_tuples = item_module.alias_tuples_from_dict( input_aliases_dict) method = getattr(provider, method_name) try: method_response = method(input_alias_tuples) except ProviderError: method_response = None logger.info( u"{:20}: **ProviderError {tiid} {method_name} {provider_name} " .format(worker_name, tiid=tiid, provider_name=provider_name.upper(), method_name=method_name.upper())) if method_name == "aliases": # update aliases to include the old ones too aliases_providers_run += [provider_name] if method_response: new_aliases_dict = item_module.alias_dict_from_tuples( method_response) new_canonical_aliases_dict = item_module.canonical_aliases( new_aliases_dict) response = item_module.merge_alias_dicts( new_canonical_aliases_dict, input_aliases_dict) else: response = input_aliases_dict else: response = method_response logger.info( u"{:20}: /biblio_print, RETURNED {tiid} {method_name} {provider_name} : {response}" .format(worker_name, tiid=tiid, method_name=method_name.upper(), provider_name=provider_name.upper(), response=response)) callback(tiid, response, method_name, aliases_providers_run) try: del thread_count[provider_name][tiid + method_name] except KeyError: # thread isn't there when we call wrapper in unit tests pass return response
def metrics( self, aliases, provider_url_template=None, # ignore this because multiple url steps cache_enabled=True): # Only lookup metrics for items with appropriate ids from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) metrics_page = None # try lookup by doi try: metrics_page = self._get_metrics_lookup_page( self.metrics_from_doi_template, aliases_dict["doi"][0], cache_enabled) except KeyError: pass # try lookup by pmid if not metrics_page: try: metrics_page = self._get_metrics_lookup_page( self.metrics_from_pmid_template, aliases_dict["pmid"][0], cache_enabled) except KeyError: pass # try lookup by title if not metrics_page: try: page = self._get_uuid_lookup_page( aliases_dict["biblio"][0]["title"], cache_enabled) if page: uuid = self._get_uuid_from_title(aliases_dict, page)["uuid"] if uuid: logger.debug( u"Mendeley: uuid is %s for %s" % (uuid, aliases_dict["biblio"][0]["title"])) metrics_page = self._get_metrics_lookup_page( self.metrics_from_uuid_template, uuid) else: logger.debug(u"Mendeley: couldn't find uuid for %s" % (aliases_dict["biblio"][0]["title"])) except (KeyError, TypeError): pass # give up! if not metrics_page: return {} metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page( metrics_page) return metrics_and_drilldown
def wrapper(cls, tiid, input_aliases_dict, provider, method_name, aliases_providers_run, callback): # logger.info(u"{:20}: **Starting {tiid} {provider_name} {method_name} with {aliases}".format( # "wrapper", tiid=tiid, provider_name=provider.provider_name, method_name=method_name, aliases=aliases)) provider_name = provider.provider_name worker_name = provider_name + "_worker" input_alias_tuples = item_module.alias_tuples_from_dict(input_aliases_dict) method = getattr(provider, method_name) try: method_response = method(input_alias_tuples) except ProviderError: method_response = None logger.info( u"{:20}: **ProviderError {tiid} {method_name} {provider_name} ".format( worker_name, tiid=tiid, provider_name=provider_name.upper(), method_name=method_name.upper() ) ) if method_name == "aliases": # update aliases to include the old ones too aliases_providers_run += [provider_name] if method_response: new_aliases_dict = item_module.alias_dict_from_tuples(method_response) new_canonical_aliases_dict = item_module.canonical_aliases(new_aliases_dict) response = item_module.merge_alias_dicts(new_canonical_aliases_dict, input_aliases_dict) else: response = input_aliases_dict else: response = method_response logger.info( u"{:20}: /biblio_print, RETURNED {tiid} {method_name} {provider_name} : {response}".format( worker_name, tiid=tiid, method_name=method_name.upper(), provider_name=provider_name.upper(), response=response, ) ) callback(tiid, response, method_name, aliases_providers_run) try: del thread_count[provider_name][tiid + method_name] except KeyError: # thread isn't there when we call wrapper in unit tests pass return response
def get_best_url(self, aliases): filtered = self.relevant_aliases(aliases) if filtered: from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) if "doi" in aliases_dict: return u"http://doi.org/" + aliases_dict["doi"][0] if "pmid" in aliases_dict: return u"http://www.ncbi.nlm.nih.gov/pubmed/" + aliases_dict["pmid"][0] if "pmc" in aliases_dict: return u"http://www.ncbi.nlm.nih.gov/pmc/articles/" + aliases_dict["pmc"][0] if "url" in aliases_dict: return aliases_dict["url"][0] return None
def _get_doc(self, aliases): from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) if not self.session: self.session = self._connect() doc = None lookup_by = [namespace for namespace in aliases_dict.keys() if namespace in ["doi", "pmid", "arxiv"]] if lookup_by: doc = self._get_doc_by_id(lookup_by[0], aliases_dict) if not doc and ("biblio" in aliases_dict): doc = self._get_doc_by_title(aliases_dict) return doc
def get_best_url(self, aliases): filtered = self.relevant_aliases(aliases) if filtered: from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) if "doi" in aliases_dict: return u"http://doi.org/" + aliases_dict["doi"][0] if "pmid" in aliases_dict: return u"http://www.ncbi.nlm.nih.gov/pubmed/" + aliases_dict[ "pmid"][0] if "pmc" in aliases_dict: return u"http://www.ncbi.nlm.nih.gov/pmc/articles/" + aliases_dict[ "pmc"][0] if "url" in aliases_dict: return aliases_dict["url"][0] return None
def metrics( self, aliases, provider_url_template=None, cache_enabled=True # ignore this because multiple url steps ): # Only lookup metrics for items with appropriate ids from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) metrics_page = None # try lookup by doi try: metrics_page = self._get_metrics_lookup_page( self.metrics_from_doi_template, aliases_dict["doi"][0], cache_enabled ) except KeyError: pass # try lookup by pmid if not metrics_page: try: metrics_page = self._get_metrics_lookup_page( self.metrics_from_pmid_template, aliases_dict["pmid"][0], cache_enabled ) except KeyError: pass # try lookup by title if not metrics_page: try: page = self._get_uuid_lookup_page(aliases_dict["biblio"][0]["title"], cache_enabled) if page: uuid = self._get_uuid_from_title(aliases_dict, page)["uuid"] if uuid: logger.debug(u"Mendeley: uuid is %s for %s" % (uuid, aliases_dict["biblio"][0]["title"])) metrics_page = self._get_metrics_lookup_page(self.metrics_from_uuid_template, uuid) else: logger.debug(u"Mendeley: couldn't find uuid for %s" % (aliases_dict["biblio"][0]["title"])) except (KeyError, TypeError): pass # give up! if not metrics_page: return {} metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(metrics_page) return metrics_and_drilldown
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis): item_objects = item_module.Item.query.filter(item_module.Item.tiid.in_(tiids)).all() dicts_to_refresh = [] for item_obj in item_objects: try: tiid = item_obj.tiid item_obj.set_last_refresh_start() db.session.add(item_obj) alias_dict = item_module.alias_dict_from_tuples(item_obj.alias_tuples) dicts_to_refresh += [{"tiid":tiid, "aliases_dict": alias_dict}] except AttributeError: logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format( tiid=tiid)) db.session.commit() item_module.start_item_update(dicts_to_refresh, priority, myredis) return tiids
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): doc = self._get_doc(aliases) new_aliases = [] if doc: from totalimpact import item aliases_dict = item.alias_dict_from_tuples(aliases) print doc.identifiers for namespace in doc.identifiers: if namespace in ["doi", "arxiv", "pmid", "scopus"] and (namespace not in aliases_dict): new_aliases += [(namespace, doc.identifiers[namespace])] new_aliases += [("url", doc.link)] new_aliases += [("mendeley_uuid", doc.id)] new_aliases = [alias for alias in new_aliases if alias not in aliases] return new_aliases
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis): item_objects = item_module.Item.query.filter( item_module.Item.tiid.in_(tiids)).all() dicts_to_refresh = [] for item_obj in item_objects: try: tiid = item_obj.tiid item_obj.set_last_refresh_start() db.session.add(item_obj) alias_dict = item_module.alias_dict_from_tuples( item_obj.alias_tuples) dicts_to_refresh += [{"tiid": tiid, "aliases_dict": alias_dict}] except AttributeError: logger.debug( u"couldn't find tiid {tiid} so not refreshing its metrics". format(tiid=tiid)) db.session.commit() item_module.start_item_update(dicts_to_refresh, priority, myredis) return tiids
def test_alias_dict_from_tuples(self): aliases = [('unknown_namespace', 'myname')] alias_dict = item_module.alias_dict_from_tuples(aliases) assert_equals(alias_dict, {'unknown_namespace': ['myname']})
method_response = None logger.info(u"{:20}: **ProviderError {tiid} {method_name} {provider_name}, Exception type {exception_type} {exception_arguments}".format( worker_name, tiid=tiid, provider_name=provider_name.upper(), method_name=method_name.upper(), exception_type=type(e).__name__, exception_arguments=e.args)) logger.info(u"{:20}: /biblio_print, RETURNED {tiid} {method_name} {provider_name} : {method_response}".format( worker_name, tiid=tiid, method_name=method_name.upper(), provider_name=provider_name.upper(), method_response=method_response)) if method_name == "aliases" and method_response: initial_alias_dict = item_module.alias_dict_from_tuples(method_response) new_canonical_aliases_dict = item_module.canonical_aliases(initial_alias_dict) full_aliases_dict = item_module.merge_alias_dicts(new_canonical_aliases_dict, input_aliases_dict) else: full_aliases_dict = input_aliases_dict add_to_database_if_nonzero(tiid, method_response, method_name, provider_name) return full_aliases_dict # last variable is an artifact so it has same call signature as other callbacks def add_to_database_if_nonzero( tiid,