def get_site_id_for_template(self, aliases): for alias in aliases: (namespace, nid) = alias if ("blog"==namespace): blog_url_for_template = provider.strip_leading_http(nid).lower() return blog_url_for_template return None
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): logger.info(u"calling webpage to handle aliases") nid = self.get_best_id(aliases) aliases_dict = provider.alias_dict_from_tuples(aliases) nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) blog_url = self.blog_url_from_nid(nid) biblio_dict = webpage.Webpage().biblio([("url", post_url)], provider_url_template, cache_enabled) biblio_dict["url"] = post_url biblio_dict["account"] = provider.strip_leading_http( self.blog_url_from_nid(nid)) if ("title" in biblio_dict) and ("|" in biblio_dict["title"]): (title, blog_title) = biblio_dict["title"].rsplit("|", 1) biblio_dict["title"] = title.strip() biblio_dict["blog_title"] = blog_title.strip() # try to get a response from wordpress.com url = self._get_templated_url(self.biblio_url_template, blog_url, "biblio") response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("name" in response.text): biblio_dict["hosting_platform"] = "wordpress.com" # in the future could get date posted from these sorts of calls: # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips return biblio_dict
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): logger.info(u"calling webpage to handle aliases") nid = self.get_best_id(aliases) aliases_dict = provider.alias_dict_from_tuples(aliases) nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) blog_url = self.blog_url_from_nid(nid) biblio_dict = webpage.Webpage().biblio([("url", post_url)], provider_url_template, cache_enabled) biblio_dict["url"] = post_url biblio_dict["account"] = provider.strip_leading_http(self.blog_url_from_nid(nid)) if ("title" in biblio_dict) and ("|" in biblio_dict["title"]): (title, blog_title) = biblio_dict["title"].rsplit("|", 1) biblio_dict["title"] = title.strip() biblio_dict["blog_title"] = blog_title.strip() # try to get a response from wordpress.com url = self._get_templated_url(self.biblio_url_template, blog_url, "biblio") response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("name" in response.text): biblio_dict["hosting_platform"] = "wordpress.com" # in the future could get date posted from these sorts of calls: # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips return biblio_dict
def get_site_id_for_template(self, aliases): for alias in aliases: (namespace, nid) = alias if ("blog" == namespace): blog_url_for_template = provider.strip_leading_http( nid).lower() return blog_url_for_template return None
def _get_templated_url(self, template, nid, method=None): url = None if method == "biblio": nid = provider.strip_leading_http(nid).lower() url = template % (nid) elif method == "provenance": url = self.post_url_from_nid(nid) else: url = template % (nid) return (url)
def _get_templated_url(self, template, nid, method=None): url = None if method=="biblio": nid = provider.strip_leading_http(nid).lower() url = template % (nid) elif method=="provenance": url = self.post_url_from_nid(nid) else: url = template % (nid) return(url)
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) if "blog" in aliases_dict: id = aliases_dict["blog"][0] # Only lookup biblio for items with appropriate ids if not id: #self.logger.debug(u"%s not checking biblio, no relevant alias" % (self.provider_name)) return None if not provider_url_template: provider_url_template = self.biblio_url_template self.logger.debug(u"%s getting biblio for %s" % (self.provider_name, id)) # set up stuff that is true for all blogs, wordpress and not biblio_dict = {} biblio_dict["url"] = id biblio_dict["account"] = provider.strip_leading_http(id) biblio_dict["is_account"] = True # special key to tell webapp to render as genre heading # now add things that are true just for wordpress blogs if not provider_url_template: provider_url_template = self.biblio_url_template url = self._get_templated_url(provider_url_template, id, "biblio") # try to get a response from the data provider response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("name" in response.text): biblio_dict["hosting_platform"] = "wordpress.com" try: biblio_dict.update(self._extract_biblio(response.text, id)) except (AttributeError, TypeError): pass return biblio_dict
def member_items(self, input_dict, provider_url_template=None, cache_enabled=True): members = [] if "blogUrl" in input_dict: blog_url = input_dict["blogUrl"] else: blog_url = None if blog_url: members += [("blog", blog_url)] # import top blog posts for post_url in topsy.Topsy().top_tweeted_urls(blog_url, number_to_return=10): blog_post_nid = { "post_url": post_url, "blog_url": blog_url } members += [("blog_post", json.dumps(blog_post_nid))] # handle individual blog posts if "blog_post_urls" in input_dict: members_as_webpages = webpage.Webpage().member_items(input_dict["blog_post_urls"]) for (url_namespace, post_url) in members_as_webpages: if blog_url: blog_url_for_blog_post_urls = blog_url else: blog_url_for_blog_post_urls = "http://"+provider.strip_leading_http(post_url).split("/", 1)[0] blog_post_nid = { "post_url": post_url, "blog_url": blog_url_for_blog_post_urls } members += [("blog_post", json.dumps(blog_post_nid))] return (members)
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) new_aliases = [] if "blog_post" in aliases_dict: nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) # add url as alias if not already there new_alias = ("url", post_url) if new_alias not in aliases: new_aliases += [new_alias] # now add the wordpress alias info if it isn't already there if not "wordpress_blog_post" in aliases_dict: blog_url = provider.strip_leading_http( self.blog_url_from_nid(nid)) wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url response = self.http_get(wordpress_blog_api_url) if "name" in response.text: # it is a wordpress blog, so now get its wordpress post ID if post_url.endswith("/"): post_url = post_url[:-1] post_end_slug = post_url.rsplit("/", 1)[1] wordpress_post_api_url = self.metrics_url_template_wordpress_post % ( blog_url, post_end_slug) response = self.http_get(wordpress_post_api_url) if "ID" in response.text: wordpress_post_id = json.loads(response.text)["ID"] nid_as_dict = json.loads(nid) nid_as_dict.update( {"wordpress_post_id": wordpress_post_id}) new_aliases += [("wordpress_blog_post", json.dumps(nid_as_dict))] return new_aliases
def aliases(self, aliases, provider_url_template=None, cache_enabled=True): aliases_dict = provider.alias_dict_from_tuples(aliases) new_aliases = [] if "blog_post" in aliases_dict: nid = aliases_dict["blog_post"][0] post_url = self.post_url_from_nid(nid) # add url as alias if not already there new_alias = ("url", post_url) if new_alias not in aliases: new_aliases += [new_alias] # now add the wordpress alias info if it isn't already there if not "wordpress_blog_post" in aliases_dict: blog_url = provider.strip_leading_http(self.blog_url_from_nid(nid)) wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url response = self.http_get(wordpress_blog_api_url) if "name" in response.text: # it is a wordpress blog, so now get its wordpress post ID if post_url.endswith("/"): post_url = post_url[:-1] post_end_slug = post_url.rsplit("/", 1)[1] wordpress_post_api_url = self.metrics_url_template_wordpress_post %(blog_url, post_end_slug) response = self.http_get(wordpress_post_api_url) if "ID" in response.text: wordpress_post_id = json.loads(response.text)["ID"] nid_as_dict = json.loads(nid) nid_as_dict.update({"wordpress_post_id": wordpress_post_id}) new_aliases += [("wordpress_blog_post", json.dumps(nid_as_dict))] return new_aliases
def _get_templated_url(self, template, nid, method=None): if method in ["metrics", "biblio", "aliases"]: nid = provider.strip_leading_http(nid).lower() url = template % (nid) return(url)
def metrics(self, aliases, provider_url_template=None, cache_enabled=True, analytics_credentials=None): metrics = {} aliases_dict = provider.alias_dict_from_tuples(aliases) if "blog" in aliases_dict: blog_url = aliases_dict["blog"][0] url_override = self.metrics_url_template_public % (provider.strip_leading_http(blog_url).lower()) new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_subscribers, url_override=url_override) metrics.update(new_metrics) if "wordpress_blog_id" in aliases_dict: wordpress_blog_id = aliases_dict["wordpress_blog_id"][0] url_override = self.metrics_url_template_comments % wordpress_blog_id new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_blog_comments, url_override=url_override) metrics.update(new_metrics) if ("blog" in aliases_dict) and analytics_credentials: blog_url = aliases_dict["blog"][0] api_key = analytics_credentials["wordpress_api_key"] url_override = self.metrics_url_template_wordpress_blog_views % (api_key, provider.strip_leading_http(blog_url).lower()) new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_blog_views, url_override=url_override) metrics.update(new_metrics) if ("wordpress_blog_post" in aliases_dict): nid = aliases_dict["wordpress_blog_post"][0] post_id = self.wordpress_post_id_from_nid(nid) blog_url = self.blog_url_from_nid(nid) url_override = self.metrics_url_template_wordpress_post_comments % (provider.strip_leading_http(blog_url).lower(), post_id) new_metrics = self.get_metrics_for_id(post_id, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_post_comments, url_override=url_override) metrics.update(new_metrics) if analytics_credentials: api_key = analytics_credentials["wordpress_api_key"] url_override = self.metrics_url_template_wordpress_post_views % (api_key, provider.strip_leading_http(blog_url).lower(), post_id) new_metrics = self.get_metrics_for_id(blog_url, cache_enabled=cache_enabled, extract_metrics_method=self._extract_metrics_blog_views, url_override=url_override) metrics.update(new_metrics) metrics_and_drilldown = {} for metric_name in metrics: drilldown_url = self.provenance_url(metric_name, aliases) metrics_and_drilldown[metric_name] = (metrics[metric_name], drilldown_url) return metrics_and_drilldown