コード例 #1
0
    def biblio(self, aliases, provider_url_template=None, cache_enabled=True):
        logger.info(u"calling webpage to handle aliases")

        nid = self.get_best_id(aliases)
        aliases_dict = provider.alias_dict_from_tuples(aliases)
        nid = aliases_dict["blog_post"][0]
        post_url = self.post_url_from_nid(nid)
        blog_url = self.blog_url_from_nid(nid)

        biblio_dict = webpage.Webpage().biblio([("url", post_url)],
                                               provider_url_template,
                                               cache_enabled)
        biblio_dict["url"] = post_url
        biblio_dict["account"] = provider.strip_leading_http(
            self.blog_url_from_nid(nid))
        if ("title" in biblio_dict) and ("|" in biblio_dict["title"]):
            (title, blog_title) = biblio_dict["title"].rsplit("|", 1)
            biblio_dict["title"] = title.strip()
            biblio_dict["blog_title"] = blog_title.strip()

        # try to get a response from wordpress.com
        url = self._get_templated_url(self.biblio_url_template, blog_url,
                                      "biblio")
        response = self.http_get(url, cache_enabled=cache_enabled)
        if (response.status_code == 200) and ("name" in response.text):
            biblio_dict["hosting_platform"] = "wordpress.com"

        # in the future could get date posted from these sorts of calls:
        # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips

        return biblio_dict
コード例 #2
0
 def provenance_url(self, metric_name, aliases):
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     try:
         drilldown_url = self._get_templated_url(self.provenance_url_template, aliases_dict["altmetric_com"][0])
     except KeyError:
         drilldown_url = ""
     return drilldown_url
コード例 #3
0
    def biblio(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if not "pmid" in aliases_dict:
            return None
        id = aliases_dict["pmid"][0]

        self.logger.debug(u"%s getting biblio for %s" % (self.provider_name, id))
        biblio_dict = {}

        efetch_url = self._get_templated_url(self.biblio_url_efetch_template, id, "biblio")
        efetch_page = self._get_eutils_page(efetch_url, id, cache_enabled=cache_enabled)
        biblio_dict.update(self._extract_biblio_efetch(efetch_page, id))

        elink_url = self._get_templated_url(self.biblio_url_elink_template, id, "biblio")
        elink_page = self._get_eutils_page(elink_url, id, cache_enabled=cache_enabled)
        biblio_dict.update(self._extract_biblio_elink(elink_page, id))

        if "pmc" in aliases_dict:
            biblio_dict["free_fulltext_url"] = self.pmc_article_template % aliases_dict["pmc"][0]
        elif ("issn" in biblio_dict) and provider.is_issn_in_doaj(biblio_dict["issn"]):
            biblio_dict["free_fulltext_url"] = self.aliases_pubmed_url_template %id

        return biblio_dict
コード例 #4
0
    def biblio(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True): 
        logger.info(u"calling webpage to handle aliases")

        nid = self.get_best_id(aliases)
        aliases_dict = provider.alias_dict_from_tuples(aliases)   
        nid = aliases_dict["blog_post"][0]
        post_url = self.post_url_from_nid(nid)
        blog_url = self.blog_url_from_nid(nid)

        biblio_dict = webpage.Webpage().biblio([("url", post_url)], provider_url_template, cache_enabled) 
        biblio_dict["url"] = post_url
        biblio_dict["account"] = provider.strip_leading_http(self.blog_url_from_nid(nid))
        if ("title" in biblio_dict) and ("|" in biblio_dict["title"]):
            (title, blog_title) = biblio_dict["title"].rsplit("|", 1)
            biblio_dict["title"] = title.strip()
            biblio_dict["blog_title"] = blog_title.strip()

        # try to get a response from wordpress.com
        url = self._get_templated_url(self.biblio_url_template, blog_url, "biblio")           
        response = self.http_get(url, cache_enabled=cache_enabled)
        if (response.status_code == 200) and ("name" in response.text):
            biblio_dict["hosting_platform"] = "wordpress.com"

        # in the future could get date posted from these sorts of calls:
        # https://public-api.wordpress.com/rest/v1/sites/blog.impactstory.org/posts/slug:link-your-figshare-and-impactstory-strips

        return biblio_dict
コード例 #5
0
 def get_best_id(self, aliases):
     # return it with the id type as a prefix before / because that's how the altmetric.com api expects it
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     if "altmetric_com" in aliases_dict:
         best_id = aliases_dict["altmetric_com"][0]
     else:
         best_id = None
     return(best_id)
コード例 #6
0
 def provenance_url(self, metric_name, aliases):
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     try:
         drilldown_url = self._get_templated_url(
             self.provenance_url_template, aliases_dict["altmetric_com"][0])
     except KeyError:
         drilldown_url = ""
     return drilldown_url
コード例 #7
0
 def get_best_id(self, aliases):
     # return it with the id type as a prefix before / because that's how the altmetric.com api expects it
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     if "altmetric_com" in aliases_dict:
         best_id = aliases_dict["altmetric_com"][0]
     else:
         best_id = None
     return (best_id)
コード例 #8
0
    def get_best_id(self, aliases):
        aliases_dict = provider.alias_dict_from_tuples(aliases)

        # go through in preferred order
        for key in ["wordpress_blog_post", "blog_post", "url"]:
            if key in aliases_dict:
                nid = aliases_dict[key][0]
                return nid
        return None
コード例 #9
0
    def get_best_id(self, aliases):
        aliases_dict = provider.alias_dict_from_tuples(aliases)

        # go through in preferred order
        for key in ["wordpress_blog_post", "blog_post", "url"]:
            if key in aliases_dict:
                nid = aliases_dict[key][0]
                return nid
        return None
コード例 #10
0
    def metrics(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        (namespace, nid) = self.get_best_alias(aliases_dict)
        if not nid:
            #self.logger.debug(u"%s not checking metrics, no relevant alias" % (self.provider_name))
            return {}

        metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(
            provider_url_template, namespace=namespace, id=nid)

        return metrics_and_drilldown
コード例 #11
0
 def get_id_for_aliases(self, aliases):
     # return it with the id type as a prefix before / because that's how the altmetric.com api expects it
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     if "doi" in aliases_dict:
         best_id = "doi/{id}".format(id=aliases_dict["doi"][0])
     elif "pmid" in aliases_dict:
         best_id = "pmid/{id}".format(id=aliases_dict["pmid"][0])
     elif "arxiv" in aliases_dict:
         best_id = "arxiv_id/{id}".format(id=aliases_dict["arxiv"][0])
     elif "altmetric_com" in aliases_dict:
         best_id = "altmetric_com/{id}".format(id=aliases_dict["altmetric_com"][0])
     else:
          best_id = None
     return(best_id)
コード例 #12
0
ファイル: webpage.py プロジェクト: dbeucke/total-impact-core
    def biblio(self, aliases, provider_url_template=None, cache_enabled=True):

        biblio = {}
        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if "biblio" in aliases_dict:
            biblio = aliases_dict["biblio"][0]
        elif "url" in aliases_dict:
            url = aliases_dict["url"][0]
            if not provider_url_template:
                provider_url_template = self.biblio_url_template
            biblio = self.get_biblio_for_id(url, provider_url_template,
                                            cache_enabled)

        return biblio
コード例 #13
0
    def aliases(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        if "altmetric_com" in aliases_dict:
            return []  # nothing new to add

        nid = self.get_id_for_aliases(aliases)
        if not nid:
            return []

        new_aliases = self._get_aliases_for_id(nid, provider_url_template,
                                               cache_enabled)
        return new_aliases
コード例 #14
0
 def get_id_for_aliases(self, aliases):
     # return it with the id type as a prefix before / because that's how the altmetric.com api expects it
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     if "doi" in aliases_dict:
         best_id = "doi/{id}".format(id=aliases_dict["doi"][0])
     elif "pmid" in aliases_dict:
         best_id = "pmid/{id}".format(id=aliases_dict["pmid"][0])
     elif "arxiv" in aliases_dict:
         best_id = "arxiv_id/{id}".format(id=aliases_dict["arxiv"][0])
     elif "altmetric_com" in aliases_dict:
         best_id = "altmetric_com/{id}".format(
             id=aliases_dict["altmetric_com"][0])
     else:
         best_id = None
     return (best_id)
コード例 #15
0
    def metrics(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        metrics_and_drilldown = {}
        if "doi" in aliases_dict:
            nid = aliases_dict["doi"][0]
            metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(
                provider_url_template, namespace="doi", id=nid)
        if not metrics_and_drilldown and "biblio" in aliases_dict:
            nid = aliases_dict["biblio"][0]
            metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(
                provider_url_template, namespace="biblio", id=nid)

        return metrics_and_drilldown
コード例 #16
0
    def aliases(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True):            

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        if "altmetric_com" in aliases_dict:
            return []  # nothing new to add

        nid = self.get_id_for_aliases(aliases)
        if not nid:
            return []

        new_aliases = self._get_aliases_for_id(nid, provider_url_template, cache_enabled)
        return new_aliases
コード例 #17
0
ファイル: scopus.py プロジェクト: imclab/total-impact-core
    def metrics(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        (namespace, nid) = self.get_best_alias(aliases_dict)
        if not nid:
            #self.logger.debug(u"%s not checking metrics, no relevant alias" % (self.provider_name))
            return {}

        metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(provider_url_template, 
                namespace=namespace, 
                id=nid)

        return metrics_and_drilldown
コード例 #18
0
ファイル: webpage.py プロジェクト: imclab/total-impact-core
    def biblio(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        biblio = {}
        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if "biblio" in aliases_dict:
            biblio = aliases_dict["biblio"][0]
        elif "url" in aliases_dict:
            url = aliases_dict["url"][0]
            if not provider_url_template:
                provider_url_template = self.biblio_url_template
            biblio = self.get_biblio_for_id(url, provider_url_template, cache_enabled)

        return biblio
コード例 #19
0
    def aliases(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        if not "biblio" in aliases_dict:
            return []
        if ("doi" in aliases_dict) or ("pmid" in aliases_dict):
            # have better sources, leave them to it.
            return []

        new_aliases = []
        for alias in aliases_dict["biblio"]:
            new_aliases += self._get_aliases_for_id(alias, provider_url_template, cache_enabled)

        # get uniques for things that are unhashable
        new_aliases_unique = [k for k, v in itertools.groupby(sorted(new_aliases))]

        return new_aliases_unique
コード例 #20
0
    def biblio(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if "blog" in aliases_dict:
            id = aliases_dict["blog"][0]

        # Only lookup biblio for items with appropriate ids
        if not id:
            #self.logger.debug(u"%s not checking biblio, no relevant alias" % (self.provider_name))
            return None

        if not provider_url_template:
            provider_url_template = self.biblio_url_template

        self.logger.debug(u"%s getting biblio for %s" % (self.provider_name, id))

        # set up stuff that is true for all blogs, wordpress and not
        biblio_dict = {}
        biblio_dict["url"] = id
        biblio_dict["account"] = provider.strip_leading_http(id)
        biblio_dict["is_account"] = True  # special key to tell webapp to render as genre heading

        # now add things that are true just for wordpress blogs

        if not provider_url_template:
            provider_url_template = self.biblio_url_template
        url = self._get_templated_url(provider_url_template, id, "biblio")

        # try to get a response from the data provider        
        response = self.http_get(url, cache_enabled=cache_enabled)

        if (response.status_code == 200) and ("name" in response.text):
            biblio_dict["hosting_platform"] = "wordpress.com"
            try:
                biblio_dict.update(self._extract_biblio(response.text, id))
            except (AttributeError, TypeError):
                pass

        return biblio_dict
コード例 #21
0
    def aliases(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        doi = None
        new_aliases = []

        if "doi" in aliases_dict:
            doi = aliases_dict["doi"][0]
        else:
            if "url" in aliases_dict:
                for url in aliases_dict["url"]:
                    if url.startswith("http://dx.doi.org/"):
                        doi = url.replace("http://dx.doi.org/", "")
                        new_aliases += [("doi", doi)]
                    elif url.startswith("http://doi.org/"):
                        doi = url.replace("http://doi.org/", "")
                        new_aliases += [("doi", doi)]

        if not doi:
            if "biblio" in aliases_dict:
                doi = self._lookup_doi_from_biblio(aliases_dict["biblio"][0],
                                                   cache_enabled)
                if doi:
                    new_aliases += [("doi", doi)]
                else:
                    if "url" in aliases_dict["biblio"][0]:
                        new_aliases += [("url",
                                         aliases_dict["biblio"][0]["url"])]

        if not doi:
            # nothing else we can do
            return new_aliases  #urls if we have them, otherwise empty list

        new_aliases += self._lookup_urls_from_doi(doi, provider_url_template,
                                                  cache_enabled)

        # get uniques for things that are unhashable
        new_aliases_unique = [
            k for k, v in itertools.groupby(sorted(new_aliases))
        ]
        return new_aliases_unique
コード例 #22
0
    def metrics(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        metrics_and_drilldown = {}
        if "doi" in aliases_dict:
            nid = aliases_dict["doi"][0]
            metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(provider_url_template, 
                    namespace="doi", 
                    id=nid)
        if not metrics_and_drilldown and "biblio" in aliases_dict:
            nid = aliases_dict["biblio"][0]
            metrics_and_drilldown = self._get_metrics_and_drilldown_from_metrics_page(provider_url_template, 
                    namespace="biblio", 
                    id=nid)

        return metrics_and_drilldown
コード例 #23
0
    def aliases(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True):            

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        doi = None
        new_aliases = []

        if "doi" in aliases_dict:
            doi = aliases_dict["doi"][0]
        else:
            if "url" in aliases_dict:
                for url in aliases_dict["url"]:
                    if url.startswith("http://dx.doi.org/"):
                        doi = url.replace("http://dx.doi.org/", "")
                        new_aliases += [("doi", doi)]
                    elif url.startswith("http://doi.org/"):
                        doi = url.replace("http://doi.org/", "")
                        new_aliases += [("doi", doi)]

        if not doi:
            if "biblio" in aliases_dict:
                doi = self._lookup_doi_from_biblio(aliases_dict["biblio"][0], cache_enabled)
                if doi:
                    new_aliases += [("doi", doi)]   
                else:
                    if "url" in aliases_dict["biblio"][0]:
                        new_aliases += [("url", aliases_dict["biblio"][0]["url"])] 

        if not doi:
            # nothing else we can do 
            return new_aliases  #urls if we have them, otherwise empty list

        new_aliases += self._lookup_urls_from_doi(doi, provider_url_template, cache_enabled)
        
        # get uniques for things that are unhashable
        new_aliases_unique = [k for k,v in itertools.groupby(sorted(new_aliases))]
        return new_aliases_unique
コード例 #24
0
ファイル: mendeley.py プロジェクト: dbeucke/total-impact-core
    def aliases(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)

        if not "biblio" in aliases_dict:
            return []
        if ("doi" in aliases_dict) or ("pmid" in aliases_dict):
            # have better sources, leave them to it.
            return []

        new_aliases = []
        for alias in aliases_dict["biblio"]:
            new_aliases += self._get_aliases_for_id(alias,
                                                    provider_url_template,
                                                    cache_enabled)

        # get uniques for things that are unhashable
        new_aliases_unique = [
            k for k, v in itertools.groupby(sorted(new_aliases))
        ]

        return new_aliases_unique
コード例 #25
0
    def aliases(self, aliases, provider_url_template=None, cache_enabled=True):

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        new_aliases = []

        if "blog_post" in aliases_dict:
            nid = aliases_dict["blog_post"][0]
            post_url = self.post_url_from_nid(nid)

            # add url as alias if not already there
            new_alias = ("url", post_url)
            if new_alias not in aliases:
                new_aliases += [new_alias]

            # now add the wordpress alias info if it isn't already there
            if not "wordpress_blog_post" in aliases_dict:
                blog_url = provider.strip_leading_http(
                    self.blog_url_from_nid(nid))
                wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url

                response = self.http_get(wordpress_blog_api_url)
                if "name" in response.text:
                    # it is a wordpress blog, so now get its wordpress post ID
                    if post_url.endswith("/"):
                        post_url = post_url[:-1]
                    post_end_slug = post_url.rsplit("/", 1)[1]

                    wordpress_post_api_url = self.metrics_url_template_wordpress_post % (
                        blog_url, post_end_slug)
                    response = self.http_get(wordpress_post_api_url)
                    if "ID" in response.text:
                        wordpress_post_id = json.loads(response.text)["ID"]
                        nid_as_dict = json.loads(nid)
                        nid_as_dict.update(
                            {"wordpress_post_id": wordpress_post_id})
                        new_aliases += [("wordpress_blog_post",
                                         json.dumps(nid_as_dict))]

        return new_aliases
コード例 #26
0
    def aliases(self, 
            aliases, 
            provider_url_template=None,
            cache_enabled=True):            

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        new_aliases = []

        if "blog_post" in aliases_dict:
            nid = aliases_dict["blog_post"][0]
            post_url = self.post_url_from_nid(nid)

            # add url as alias if not already there
            new_alias = ("url", post_url)
            if new_alias not in aliases:
                new_aliases += [new_alias]

            # now add the wordpress alias info if it isn't already there
            if not "wordpress_blog_post" in aliases_dict:
                blog_url = provider.strip_leading_http(self.blog_url_from_nid(nid))
                wordpress_blog_api_url = self.metrics_url_template_wordpress_site % blog_url

                response = self.http_get(wordpress_blog_api_url)
                if "name" in response.text:
                    # it is a wordpress blog, so now get its wordpress post ID
                    if post_url.endswith("/"):
                        post_url = post_url[:-1]
                    post_end_slug = post_url.rsplit("/", 1)[1]

                    wordpress_post_api_url = self.metrics_url_template_wordpress_post %(blog_url, post_end_slug)
                    response = self.http_get(wordpress_post_api_url)
                    if "ID" in response.text:
                        wordpress_post_id = json.loads(response.text)["ID"]
                        nid_as_dict = json.loads(nid)
                        nid_as_dict.update({"wordpress_post_id": wordpress_post_id})
                        new_aliases += [("wordpress_blog_post", json.dumps(nid_as_dict))]

        return new_aliases
コード例 #27
0
    def biblio(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True):

        biblio = {}
        aliases_dict = provider.alias_dict_from_tuples(aliases)

        if "url" in aliases_dict:
            url = aliases_dict["url"][0]
            url_fragments_to_exclude = [
                "scopus.com/inward",
                "ncbi.nlm.nih.gov/pubmed",
                "doi.org/",
                "mendeley.com/"
            ]

            if url and not any(fragment in url for fragment in url_fragments_to_exclude):
                if not provider_url_template:
                    provider_url_template = self.biblio_url_template
                biblio = self.get_biblio_for_id(url, provider_url_template, cache_enabled)

        return biblio
コード例 #28
0
    def metrics(self, 
            aliases,
            provider_url_template=None,
            cache_enabled=True, 
            analytics_credentials=None):

        metrics = {}

        aliases_dict = provider.alias_dict_from_tuples(aliases)
        if "blog" in aliases_dict:
            blog_url = aliases_dict["blog"][0]

            url_override = self.metrics_url_template_public % (provider.strip_leading_http(blog_url).lower())

            new_metrics = self.get_metrics_for_id(blog_url,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_subscribers,
                                url_override=url_override)
            metrics.update(new_metrics)

        if "wordpress_blog_id" in aliases_dict:
            wordpress_blog_id = aliases_dict["wordpress_blog_id"][0]

            url_override = self.metrics_url_template_comments % wordpress_blog_id

            new_metrics = self.get_metrics_for_id(blog_url,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_blog_comments,
                                url_override=url_override)
            metrics.update(new_metrics)


        if ("blog" in aliases_dict) and analytics_credentials:
            blog_url = aliases_dict["blog"][0]
            api_key = analytics_credentials["wordpress_api_key"]

            url_override = self.metrics_url_template_wordpress_blog_views % (api_key, provider.strip_leading_http(blog_url).lower())

            new_metrics = self.get_metrics_for_id(blog_url,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_blog_views,
                                url_override=url_override)

            metrics.update(new_metrics)

        if ("wordpress_blog_post" in aliases_dict):
            nid = aliases_dict["wordpress_blog_post"][0]
            post_id = self.wordpress_post_id_from_nid(nid)
            blog_url = self.blog_url_from_nid(nid)

            url_override = self.metrics_url_template_wordpress_post_comments % (provider.strip_leading_http(blog_url).lower(), post_id)
            new_metrics = self.get_metrics_for_id(post_id,
                                cache_enabled=cache_enabled, 
                                extract_metrics_method=self._extract_metrics_post_comments,
                                url_override=url_override)
            metrics.update(new_metrics)

            if analytics_credentials:
                api_key = analytics_credentials["wordpress_api_key"]

                url_override = self.metrics_url_template_wordpress_post_views % (api_key, provider.strip_leading_http(blog_url).lower(), post_id)
                new_metrics = self.get_metrics_for_id(blog_url,
                                    cache_enabled=cache_enabled, 
                                    extract_metrics_method=self._extract_metrics_blog_views,
                                    url_override=url_override)
                metrics.update(new_metrics)


        metrics_and_drilldown = {}
        for metric_name in metrics:
            drilldown_url = self.provenance_url(metric_name, aliases)
            metrics_and_drilldown[metric_name] = (metrics[metric_name], drilldown_url)

        return metrics_and_drilldown 
コード例 #29
0
 def provenance_url(self, metric_name, aliases):
     aliases_dict = provider.alias_dict_from_tuples(aliases)
     if "url" in aliases_dict:
         return aliases_dict["url"][0]
     else:
         return self.get_best_id(aliases)