def biblio(self, aliases, provider_url_template=None, cache_enabled=True): logger.info(u"calling webpage to handle aliases") nid = self.get_best_id(aliases) aliases_dict = provider.alias_dict_from_tuples(aliases) nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) blog_url = self.blog_url_from_nid(nid) biblio_dict = webpage.Webpage().biblio([("url", post_url)], provider_url_template, cache_enabled) biblio_dict["url"] = post_url biblio_dict["account"] = provider.strip_leading_http( self.blog_url_from_nid(nid)) if ("title" in biblio_dict) and ("|" in biblio_dict["title"]): (title, blog_title) = biblio_dict["title"].rsplit("|", 1) biblio_dict["title"] = title.strip() biblio_dict["blog_title"] = blog_title.strip() # try to get a response from wordpress.com url = self._get_templated_url(self.biblio_url_template, blog_url, "biblio") response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("name" in response.text): biblio_dict["hosting_platform"] = "wordpress.com" # in the future could get date posted from these sorts of calls: # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips return biblio_dict
def provenance_url(self, metric_name, aliases): aliases_dict = provider.alias_dict_from_tuples(aliases) try: drilldown_url = self._get_templated_url(self.provenance_url_template, aliases_dict["altmetric_com"][0]) except KeyError: drilldown_url = "" return drilldown_url
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) if not "pmid" in aliases_dict: return None id = aliases_dict["pmid"][0] self.logger.debug(u"%s getting biblio for %s" % (self.provider_name, id)) biblio_dict = {} efetch_url = self._get_templated_url(self.biblio_url_efetch_template, id, "biblio") efetch_page = self._get_eutils_page(efetch_url, id, cache_enabled=cache_enabled) biblio_dict.update(self._extract_biblio_efetch(efetch_page, id)) elink_url = self._get_templated_url(self.biblio_url_elink_template, id, "biblio") elink_page = self._get_eutils_page(elink_url, id, cache_enabled=cache_enabled) biblio_dict.update(self._extract_biblio_elink(elink_page, id)) if "pmc" in aliases_dict: biblio_dict["free_fulltext_url"] = self.pmc_article_template % aliases_dict["pmc"][0] elif ("issn" in biblio_dict) and provider.is_issn_in_doaj(biblio_dict["issn"]): biblio_dict["free_fulltext_url"] = self.aliases_pubmed_url_template %id return biblio_dict
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): logger.info(u"calling webpage to handle aliases") nid = self.get_best_id(aliases) aliases_dict = provider.alias_dict_from_tuples(aliases) nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) blog_url = self.blog_url_from_nid(nid) biblio_dict = webpage.Webpage().biblio([("url", post_url)], provider_url_template, cache_enabled) biblio_dict["url"] = post_url biblio_dict["account"] = provider.strip_leading_http(self.blog_url_from_nid(nid)) if ("title" in biblio_dict) and ("|" in biblio_dict["title"]): (title, blog_title) = biblio_dict["title"].rsplit("|", 1) biblio_dict["title"] = title.strip() biblio_dict["blog_title"] = blog_title.strip() # try to get a response from wordpress.com url = self._get_templated_url(self.biblio_url_template, blog_url, "biblio") response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("name" in response.text): biblio_dict["hosting_platform"] = "wordpress.com" # in the future could get date posted from these sorts of calls: # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips return biblio_dict
def get_best_id(self, aliases): # return it with the id type as a prefix before / because that's how the altmetric.com api expects it aliases_dict = provider.alias_dict_from_tuples(aliases) if "altmetric_com" in aliases_dict: best_id = aliases_dict["altmetric_com"][0] else: best_id = None return(best_id)
def provenance_url(self, metric_name, aliases): aliases_dict = provider.alias_dict_from_tuples(aliases) try: drilldown_url = self._get_templated_url( self.provenance_url_template, aliases_dict["altmetric_com"][0]) except KeyError: drilldown_url = "" return drilldown_url
def get_best_id(self, aliases): # return it with the id type as a prefix before / because that's how the altmetric.com api expects it aliases_dict = provider.alias_dict_from_tuples(aliases) if "altmetric_com" in aliases_dict: best_id = aliases_dict["altmetric_com"][0] else: best_id = None return (best_id)
def get_best_id(self, aliases): aliases_dict = provider.alias_dict_from_tuples(aliases) # go through in preferred order for key in ["wordpress_blog_post", "blog_post", "url"]: if key in aliases_dict: nid = aliases_dict[key][0] return nid return None
def metrics(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) (namespace, nid) = self.get_best_alias(aliases_dict) if not nid: #self.logger.debug(u"%s not checking metrics, no relevant alias" % (self.provider_name)) return {} metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page( provider_url_template, namespace=namespace, id=nid) return metrics_and_drilldown
def get_id_for_aliases(self, aliases): # return it with the id type as a prefix before / because that's how the altmetric.com api expects it aliases_dict = provider.alias_dict_from_tuples(aliases) if "doi" in aliases_dict: best_id = "doi/{id}".format(id=aliases_dict["doi"][0]) elif "pmid" in aliases_dict: best_id = "pmid/{id}".format(id=aliases_dict["pmid"][0]) elif "arxiv" in aliases_dict: best_id = "arxiv_id/{id}".format(id=aliases_dict["arxiv"][0]) elif "altmetric_com" in aliases_dict: best_id = "altmetric_com/{id}".format(id=aliases_dict["altmetric_com"][0]) else: best_id = None return(best_id)
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): biblio = {} aliases_dict = provider.alias_dict_from_tuples(aliases) if "biblio" in aliases_dict: biblio = aliases_dict["biblio"][0] elif "url" in aliases_dict: url = aliases_dict["url"][0] if not provider_url_template: provider_url_template = self.biblio_url_template biblio = self.get_biblio_for_id(url, provider_url_template, cache_enabled) return biblio
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) if "altmetric_com" in aliases_dict: return [] # nothing new to add nid = self.get_id_for_aliases(aliases) if not nid: return [] new_aliases = self._get_aliases_for_id(nid, provider_url_template, cache_enabled) return new_aliases
def get_id_for_aliases(self, aliases): # return it with the id type as a prefix before / because that's how the altmetric.com api expects it aliases_dict = provider.alias_dict_from_tuples(aliases) if "doi" in aliases_dict: best_id = "doi/{id}".format(id=aliases_dict["doi"][0]) elif "pmid" in aliases_dict: best_id = "pmid/{id}".format(id=aliases_dict["pmid"][0]) elif "arxiv" in aliases_dict: best_id = "arxiv_id/{id}".format(id=aliases_dict["arxiv"][0]) elif "altmetric_com" in aliases_dict: best_id = "altmetric_com/{id}".format( id=aliases_dict["altmetric_com"][0]) else: best_id = None return (best_id)
def metrics(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) metrics_and_drilldown = {} if "doi" in aliases_dict: nid = aliases_dict["doi"][0] metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page( provider_url_template, namespace="doi", id=nid) if not metrics_and_drilldown and "biblio" in aliases_dict: nid = aliases_dict["biblio"][0] metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page( provider_url_template, namespace="biblio", id=nid) return metrics_and_drilldown
def metrics(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) (namespace, nid) = self.get_best_alias(aliases_dict) if not nid: #self.logger.debug(u"%s not checking metrics, no relevant alias" % (self.provider_name)) return {} metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(provider_url_template, namespace=namespace, id=nid) return metrics_and_drilldown
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) if not "biblio" in aliases_dict: return [] if ("doi" in aliases_dict) or ("pmid" in aliases_dict): # have better sources, leave them to it. return [] new_aliases = [] for alias in aliases_dict["biblio"]: new_aliases += self._get_aliases_for_id(alias, provider_url_template, cache_enabled) # get uniques for things that are unhashable new_aliases_unique = [k for k, v in itertools.groupby(sorted(new_aliases))] return new_aliases_unique
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) if "blog" in aliases_dict: id = aliases_dict["blog"][0] # Only lookup biblio for items with appropriate ids if not id: #self.logger.debug(u"%s not checking biblio, no relevant alias" % (self.provider_name)) return None if not provider_url_template: provider_url_template = self.biblio_url_template self.logger.debug(u"%s getting biblio for %s" % (self.provider_name, id)) # set up stuff that is true for all blogs, wordpress and not biblio_dict = {} biblio_dict["url"] = id biblio_dict["account"] = provider.strip_leading_http(id) biblio_dict["is_account"] = True # special key to tell webapp to render as genre heading # now add things that are true just for wordpress blogs if not provider_url_template: provider_url_template = self.biblio_url_template url = self._get_templated_url(provider_url_template, id, "biblio") # try to get a response from the data provider response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("name" in response.text): biblio_dict["hosting_platform"] = "wordpress.com" try: biblio_dict.update(self._extract_biblio(response.text, id)) except (AttributeError, TypeError): pass return biblio_dict
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) doi = None new_aliases = [] if "doi" in aliases_dict: doi = aliases_dict["doi"][0] else: if "url" in aliases_dict: for url in aliases_dict["url"]: if url.startswith("http://dx.doi.org/"): doi = url.replace("http://dx.doi.org/", "") new_aliases += [("doi", doi)] elif url.startswith("http://doi.org/"): doi = url.replace("http://doi.org/", "") new_aliases += [("doi", doi)] if not doi: if "biblio" in aliases_dict: doi = self._lookup_doi_from_biblio(aliases_dict["biblio"][0], cache_enabled) if doi: new_aliases += [("doi", doi)] else: if "url" in aliases_dict["biblio"][0]: new_aliases += [("url", aliases_dict["biblio"][0]["url"])] if not doi: # nothing else we can do return new_aliases #urls if we have them, otherwise empty list new_aliases += self._lookup_urls_from_doi(doi, provider_url_template, cache_enabled) # get uniques for things that are unhashable new_aliases_unique = [ k for k, v in itertools.groupby(sorted(new_aliases)) ] return new_aliases_unique
def metrics(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) metrics_and_drilldown = {} if "doi" in aliases_dict: nid = aliases_dict["doi"][0] metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(provider_url_template, namespace="doi", id=nid) if not metrics_and_drilldown and "biblio" in aliases_dict: nid = aliases_dict["biblio"][0] metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(provider_url_template, namespace="biblio", id=nid) return metrics_and_drilldown
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) doi = None new_aliases = [] if "doi" in aliases_dict: doi = aliases_dict["doi"][0] else: if "url" in aliases_dict: for url in aliases_dict["url"]: if url.startswith("http://dx.doi.org/"): doi = url.replace("http://dx.doi.org/", "") new_aliases += [("doi", doi)] elif url.startswith("http://doi.org/"): doi = url.replace("http://doi.org/", "") new_aliases += [("doi", doi)] if not doi: if "biblio" in aliases_dict: doi = self._lookup_doi_from_biblio(aliases_dict["biblio"][0], cache_enabled) if doi: new_aliases += [("doi", doi)] else: if "url" in aliases_dict["biblio"][0]: new_aliases += [("url", aliases_dict["biblio"][0]["url"])] if not doi: # nothing else we can do return new_aliases #urls if we have them, otherwise empty list new_aliases += self._lookup_urls_from_doi(doi, provider_url_template, cache_enabled) # get uniques for things that are unhashable new_aliases_unique = [k for k,v in itertools.groupby(sorted(new_aliases))] return new_aliases_unique
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) if not "biblio" in aliases_dict: return [] if ("doi" in aliases_dict) or ("pmid" in aliases_dict): # have better sources, leave them to it. return [] new_aliases = [] for alias in aliases_dict["biblio"]: new_aliases += self._get_aliases_for_id(alias, provider_url_template, cache_enabled) # get uniques for things that are unhashable new_aliases_unique = [ k for k, v in itertools.groupby(sorted(new_aliases)) ] return new_aliases_unique
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) new_aliases = [] if "blog_post" in aliases_dict: nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) # add url as alias if not already there new_alias = ("url", post_url) if new_alias not in aliases: new_aliases += [new_alias] # now add the wordpress alias info if it isn't already there if not "wordpress_blog_post" in aliases_dict: blog_url = provider.strip_leading_http( self.blog_url_from_nid(nid)) wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url response = self.http_get(wordpress_blog_api_url) if "name" in response.text: # it is a wordpress blog, so now get its wordpress post ID if post_url.endswith("/"): post_url = post_url[:-1] post_end_slug = post_url.rsplit("/", 1)[1] wordpress_post_api_url = self.metrics_url_template_wordpress_post % ( blog_url, post_end_slug) response = self.http_get(wordpress_post_api_url) if "ID" in response.text: wordpress_post_id = json.loads(response.text)["ID"] nid_as_dict = json.loads(nid) nid_as_dict.update( {"wordpress_post_id": wordpress_post_id}) new_aliases += [("wordpress_blog_post", json.dumps(nid_as_dict))] return new_aliases
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) new_aliases = [] if "blog_post" in aliases_dict: nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) # add url as alias if not already there new_alias = ("url", post_url) if new_alias not in aliases: new_aliases += [new_alias] # now add the wordpress alias info if it isn't already there if not "wordpress_blog_post" in aliases_dict: blog_url = provider.strip_leading_http(self.blog_url_from_nid(nid)) wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url response = self.http_get(wordpress_blog_api_url) if "name" in response.text: # it is a wordpress blog, so now get its wordpress post ID if post_url.endswith("/"): post_url = post_url[:-1] post_end_slug = post_url.rsplit("/", 1)[1] wordpress_post_api_url = self.metrics_url_template_wordpress_post %(blog_url, post_end_slug) response = self.http_get(wordpress_post_api_url) if "ID" in response.text: wordpress_post_id = json.loads(response.text)["ID"] nid_as_dict = json.loads(nid) nid_as_dict.update({"wordpress_post_id": wordpress_post_id}) new_aliases += [("wordpress_blog_post", json.dumps(nid_as_dict))] return new_aliases
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): biblio = {} aliases_dict = provider.alias_dict_from_tuples(aliases) if "url" in aliases_dict: url = aliases_dict["url"][0] url_fragments_to_exclude = [ "scopus.com/inward", "ncbi.nlm.nih.gov/pubmed", "doi.org/", "mendeley.com/" ] if url and not any(fragment in url for fragment in url_fragments_to_exclude): if not provider_url_template: provider_url_template = self.biblio_url_template biblio = self.get_biblio_for_id(url, provider_url_template, cache_enabled) return biblio
def metrics(self, aliases, provider_url_template=None, cache_enabled=True, analytics_credentials=None): metrics = {} aliases_dict = provider.alias_dict_from_tuples(aliases) if "blog" in aliases_dict: blog_url = aliases_dict["blog"][0] url_override = self.metrics_url_template_public % (provider.strip_leading_http(blog_url).lower()) new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_subscribers, url_override=url_override) metrics.update(new_metrics) if "wordpress_blog_id" in aliases_dict: wordpress_blog_id = aliases_dict["wordpress_blog_id"][0] url_override = self.metrics_url_template_comments % wordpress_blog_id new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_blog_comments, url_override=url_override) metrics.update(new_metrics) if ("blog" in aliases_dict) and analytics_credentials: blog_url = aliases_dict["blog"][0] api_key = analytics_credentials["wordpress_api_key"] url_override = self.metrics_url_template_wordpress_blog_views % (api_key, provider.strip_leading_http(blog_url).lower()) new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_blog_views, url_override=url_override) metrics.update(new_metrics) if ("wordpress_blog_post" in aliases_dict): nid = aliases_dict["wordpress_blog_post"][0] post_id = self.wordpress_post_id_from_nid(nid) blog_url = self.blog_url_from_nid(nid) url_override = self.metrics_url_template_wordpress_post_comments % (provider.strip_leading_http(blog_url).lower(), post_id) new_metrics = self.get_metrics_for_id(post_id, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_post_comments, url_override=url_override) metrics.update(new_metrics) if analytics_credentials: api_key = analytics_credentials["wordpress_api_key"] url_override = self.metrics_url_template_wordpress_post_views % (api_key, provider.strip_leading_http(blog_url).lower(), post_id) new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_blog_views, url_override=url_override) metrics.update(new_metrics) metrics_and_drilldown = {} for metric_name in metrics: drilldown_url = self.provenance_url(metric_name, aliases) metrics_and_drilldown[metric_name] = (metrics[metric_name], drilldown_url) return metrics_and_drilldown
def provenance_url(self, metric_name, aliases): aliases_dict = provider.alias_dict_from_tuples(aliases) if "url" in aliases_dict: return aliases_dict["url"][0] else: return self.get_best_id(aliases)