Esempio n. 1
0
 def get_site_id_for_template(self, aliases):
     for alias in aliases:
         (namespace, nid) = alias
         if ("blog"==namespace):
             blog_url_for_template = provider.strip_leading_http(nid).lower()
             return blog_url_for_template
     return None
Esempio n. 2
0
    def biblio(self, aliases, provider_url_template=None, cache_enabled=True):
        logger.info(u"calling webpage to handle aliases")

        nid = self.get_best_id(aliases)
        aliases_dict = provider.alias_dict_from_tuples(aliases)
        nid = aliases_dict["blog_post"][0]
        post_url = self.post_url_from_nid(nid)
        blog_url = self.blog_url_from_nid(nid)

        biblio_dict = webpage.Webpage().biblio([("url", post_url)],
                                               provider_url_template,
                                               cache_enabled)
        biblio_dict["url"] = post_url
        biblio_dict["account"] = provider.strip_leading_http(
            self.blog_url_from_nid(nid))
        if ("title" in biblio_dict) and ("|" in biblio_dict["title"]):
            (title, blog_title) = biblio_dict["title"].rsplit("|", 1)
            biblio_dict["title"] = title.strip()
            biblio_dict["blog_title"] = blog_title.strip()

        # try to get a response from wordpress.com
        url = self._get_templated_url(self.biblio_url_template, blog_url,
                                      "biblio")
        response = self.http_get(url, cache_enabled=cache_enabled)
        if (response.status_code == 200) and ("name" in response.text):
            biblio_dict["hosting_platform"] = "wordpress.com"

        # in the future could get date posted from these sorts of calls:
        # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips

        return biblio_dict
Esempio n. 3
0
    def biblio(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True): 
        logger.info(u"calling webpage to handle aliases")

        nid = self.get_best_id(aliases)
        aliases_dict = provider.alias_dict_from_tuples(aliases)   
        nid = aliases_dict["blog_post"][0]
        post_url = self.post_url_from_nid(nid)
        blog_url = self.blog_url_from_nid(nid)

        biblio_dict = webpage.Webpage().biblio([("url", post_url)], provider_url_template, cache_enabled) 
        biblio_dict["url"] = post_url
        biblio_dict["account"] = provider.strip_leading_http(self.blog_url_from_nid(nid))
        if ("title" in biblio_dict) and ("|" in biblio_dict["title"]):
            (title, blog_title) = biblio_dict["title"].rsplit("|", 1)
            biblio_dict["title"] = title.strip()
            biblio_dict["blog_title"] = blog_title.strip()

        # try to get a response from wordpress.com
        url = self._get_templated_url(self.biblio_url_template, blog_url, "biblio")           
        response = self.http_get(url, cache_enabled=cache_enabled)
        if (response.status_code == 200) and ("name" in response.text):
            biblio_dict["hosting_platform"] = "wordpress.com"

        # in the future could get date posted from these sorts of calls:
        # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips

        return biblio_dict
Esempio n. 4
0
 def get_site_id_for_template(self, aliases):
     for alias in aliases:
         (namespace, nid) = alias
         if ("blog" == namespace):
             blog_url_for_template = provider.strip_leading_http(
                 nid).lower()
             return blog_url_for_template
     return None
Esempio n. 5
0
 def _get_templated_url(self, template, nid, method=None):
     url = None
     if method == "biblio":
         nid = provider.strip_leading_http(nid).lower()
         url = template % (nid)
     elif method == "provenance":
         url = self.post_url_from_nid(nid)
     else:
         url = template % (nid)
     return (url)
Esempio n. 6
0
 def _get_templated_url(self, template, nid, method=None):
     url = None
     if method=="biblio":
         nid = provider.strip_leading_http(nid).lower()
         url = template % (nid)
     elif method=="provenance":
         url = self.post_url_from_nid(nid)
     else:
         url = template % (nid)
     return(url)
    def biblio(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if "blog" in aliases_dict:
            id = aliases_dict["blog"][0]

        # Only lookup biblio for items with appropriate ids
        if not id:
            #self.logger.debug(u"%s not checking biblio, no relevant alias" % (self.provider_name))
            return None

        if not provider_url_template:
            provider_url_template = self.biblio_url_template

        self.logger.debug(u"%s getting biblio for %s" % (self.provider_name, id))

        # set up stuff that is true for all blogs, wordpress and not
        biblio_dict = {}
        biblio_dict["url"] = id
        biblio_dict["account"] = provider.strip_leading_http(id)
        biblio_dict["is_account"] = True  # special key to tell webapp to render as genre heading

        # now add things that are true just for wordpress blogs

        if not provider_url_template:
            provider_url_template = self.biblio_url_template
        url = self._get_templated_url(provider_url_template, id, "biblio")

        # try to get a response from the data provider        
        response = self.http_get(url, cache_enabled=cache_enabled)

        if (response.status_code == 200) and ("name" in response.text):
            biblio_dict["hosting_platform"] = "wordpress.com"
            try:
                biblio_dict.update(self._extract_biblio(response.text, id))
            except (AttributeError, TypeError):
                pass

        return biblio_dict
    def member_items(self, 
            input_dict, 
            provider_url_template=None, 
            cache_enabled=True):

        members = []

        if "blogUrl" in input_dict:
            blog_url = input_dict["blogUrl"]
        else:
            blog_url = None

        if blog_url:
            members += [("blog", blog_url)]

            # import top blog posts
            for post_url in topsy.Topsy().top_tweeted_urls(blog_url, number_to_return=10):
                blog_post_nid = {   
                        "post_url": post_url, 
                        "blog_url": blog_url
                        }
                members += [("blog_post", json.dumps(blog_post_nid))] 


        # handle individual blog posts
        if "blog_post_urls" in input_dict:
            members_as_webpages = webpage.Webpage().member_items(input_dict["blog_post_urls"])
            for (url_namespace, post_url) in members_as_webpages:
                if blog_url:
                    blog_url_for_blog_post_urls = blog_url
                else:
                    blog_url_for_blog_post_urls = "http://"+provider.strip_leading_http(post_url).split("/", 1)[0]
                blog_post_nid = {   
                        "post_url": post_url, 
                        "blog_url": blog_url_for_blog_post_urls 
                        }
                members += [("blog_post", json.dumps(blog_post_nid))] 

        return (members)
Esempio n. 9
0
    def aliases(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        new_aliases = []

        if "blog_post" in aliases_dict:
            nid = aliases_dict["blog_post"][0]
            post_url = self.post_url_from_nid(nid)

            # add url as alias if not already there
            new_alias = ("url", post_url)
            if new_alias not in aliases:
                new_aliases += [new_alias]

            # now add the wordpress alias info if it isn't already there
            if not "wordpress_blog_post" in aliases_dict:
                blog_url = provider.strip_leading_http(
                    self.blog_url_from_nid(nid))
                wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url

                response = self.http_get(wordpress_blog_api_url)
                if "name" in response.text:
                    # it is a wordpress blog, so now get its wordpress post ID
                    if post_url.endswith("/"):
                        post_url = post_url[:-1]
                    post_end_slug = post_url.rsplit("/", 1)[1]

                    wordpress_post_api_url = self.metrics_url_template_wordpress_post % (
                        blog_url, post_end_slug)
                    response = self.http_get(wordpress_post_api_url)
                    if "ID" in response.text:
                        wordpress_post_id = json.loads(response.text)["ID"]
                        nid_as_dict = json.loads(nid)
                        nid_as_dict.update(
                            {"wordpress_post_id": wordpress_post_id})
                        new_aliases += [("wordpress_blog_post",
                                         json.dumps(nid_as_dict))]

        return new_aliases
Esempio n. 10
0
    def aliases(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True):            

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        new_aliases = []

        if "blog_post" in aliases_dict:
            nid = aliases_dict["blog_post"][0]
            post_url = self.post_url_from_nid(nid)

            # add url as alias if not already there
            new_alias = ("url", post_url)
            if new_alias not in aliases:
                new_aliases += [new_alias]

            # now add the wordpress alias info if it isn't already there
            if not "wordpress_blog_post" in aliases_dict:
                blog_url = provider.strip_leading_http(self.blog_url_from_nid(nid))
                wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url

                response = self.http_get(wordpress_blog_api_url)
                if "name" in response.text:
                    # it is a wordpress blog, so now get its wordpress post ID
                    if post_url.endswith("/"):
                        post_url = post_url[:-1]
                    post_end_slug = post_url.rsplit("/", 1)[1]

                    wordpress_post_api_url = self.metrics_url_template_wordpress_post %(blog_url, post_end_slug)
                    response = self.http_get(wordpress_post_api_url)
                    if "ID" in response.text:
                        wordpress_post_id = json.loads(response.text)["ID"]
                        nid_as_dict = json.loads(nid)
                        nid_as_dict.update({"wordpress_post_id": wordpress_post_id})
                        new_aliases += [("wordpress_blog_post", json.dumps(nid_as_dict))]

        return new_aliases
 def _get_templated_url(self, template, nid, method=None):
     if method in ["metrics", "biblio", "aliases"]:
         nid = provider.strip_leading_http(nid).lower()
     url = template % (nid)
     return(url)
    def metrics(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True, 
            analytics_credentials=None):

        metrics = {}

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if "blog" in aliases_dict:
            blog_url = aliases_dict["blog"][0]

            url_override = self.metrics_url_template_public % (provider.strip_leading_http(blog_url).lower())

            new_metrics = self.get_metrics_for_id(blog_url,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_subscribers,
                                url_override=url_override)
            metrics.update(new_metrics)

        if "wordpress_blog_id" in aliases_dict:
            wordpress_blog_id = aliases_dict["wordpress_blog_id"][0]

            url_override = self.metrics_url_template_comments % wordpress_blog_id

            new_metrics = self.get_metrics_for_id(blog_url,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_blog_comments,
                                url_override=url_override)
            metrics.update(new_metrics)


        if ("blog" in aliases_dict) and analytics_credentials:
            blog_url = aliases_dict["blog"][0]
            api_key = analytics_credentials["wordpress_api_key"]

            url_override = self.metrics_url_template_wordpress_blog_views % (api_key, provider.strip_leading_http(blog_url).lower())

            new_metrics = self.get_metrics_for_id(blog_url,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_blog_views,
                                url_override=url_override)

            metrics.update(new_metrics)

        if ("wordpress_blog_post" in aliases_dict):
            nid = aliases_dict["wordpress_blog_post"][0]
            post_id = self.wordpress_post_id_from_nid(nid)
            blog_url = self.blog_url_from_nid(nid)

            url_override = self.metrics_url_template_wordpress_post_comments % (provider.strip_leading_http(blog_url).lower(), post_id)
            new_metrics = self.get_metrics_for_id(post_id,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_post_comments,
                                url_override=url_override)
            metrics.update(new_metrics)

            if analytics_credentials:
                api_key = analytics_credentials["wordpress_api_key"]

                url_override = self.metrics_url_template_wordpress_post_views % (api_key, provider.strip_leading_http(blog_url).lower(), post_id)
                new_metrics = self.get_metrics_for_id(blog_url,
                                    cache_enabled=cache_enabled, 
                                    extract_metrics_method=self._extract_metrics_blog_views,
                                    url_override=url_override)
                metrics.update(new_metrics)


        metrics_and_drilldown = {}
        for metric_name in metrics:
            drilldown_url = self.provenance_url(metric_name, aliases)
            metrics_and_drilldown[metric_name] = (metrics[metric_name], drilldown_url)

        return metrics_and_drilldown