def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) if not "kind" in page: raise ProviderContentMalformedError json_response = provider._load_json(page) this_video_json = json_response["items"][0] dict_of_keylists = { 'youtube:views': ['statistics', 'viewCount'], 'youtube:likes': ['statistics', 'likeCount'], 'youtube:dislikes': ['statistics', 'dislikeCount'], 'youtube:favorites': ['statistics', 'favoriteCount'], 'youtube:comments': ['statistics', 'commentCount'], } metrics_dict = provider._extract_from_data_dict( this_video_json, dict_of_keylists) metrics_dict = provider._metrics_dict_as_ints(metrics_dict) return metrics_dict
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): tweet_url = self.get_best_id(aliases) biblio_embed_url = self.biblio_template_url % ( self.tweet_id(tweet_url)) response = self.http_get(biblio_embed_url) data = provider._load_json(response.text) biblio_dict = {} biblio_dict["repository"] = "Twitter" biblio_dict["url"] = tweet_url if not data: return biblio_dict biblio_dict["title"] = u"@{screen_name}".format( screen_name=self.screen_name(tweet_url)) biblio_dict["authors"] = data["author_name"] biblio_dict["embed"] = data["html"] biblio_dict["embed_url"] = biblio_embed_url biblio_dict["account"] = u"@{screen_name}".format( screen_name=self.screen_name(tweet_url)) try: tweet_match = re.findall( u'<p>(.*?)</p>.*statuses/\d+">(.*?)</a></blockquote>', biblio_dict["embed"]) biblio_dict["tweet_text"] = tweet_match[0][0] biblio_dict["date"] = datetime.datetime.strptime( tweet_match[0][1], "%B %d, %Y").isoformat() biblio_dict["year"] = biblio_dict["date"][0:4] except (AttributeError): logger.debug("couldn't parse tweet embed {embed}".format( embed=biblio_dict["embed"])) return biblio_dict
def _extract_biblio(self, page, id=None): if not "snippet" in page: raise ProviderContentMalformedError json_response = provider._load_json(page) this_video_json = json_response["items"][0] dict_of_keylists = { 'title': ['snippet', 'title'], 'channel_title': ['snippet', 'channelTitle'], 'published_date': ['snippet', 'publishedAt'] } biblio_dict = provider._extract_from_data_dict(this_video_json, dict_of_keylists) try: biblio_dict["year"] = biblio_dict["published_date"][0:4] except KeyError: pass biblio_dict["url"] = id biblio_dict["repository"] = "YouTube" return biblio_dict
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): tweet_url = self.get_best_id(aliases) biblio_embed_url = self.biblio_template_url % (self.tweet_id(tweet_url)) response = self.http_get(biblio_embed_url) data = provider._load_json(response.text) biblio_dict = {} biblio_dict["repository"] = "Twitter" biblio_dict["url"] = tweet_url if not data: return biblio_dict biblio_dict["title"] = u"@{screen_name}".format(screen_name=self.screen_name(tweet_url)) biblio_dict["authors"] = data["author_name"] biblio_dict["embed"] = data["html"] biblio_dict["embed_url"] = biblio_embed_url biblio_dict["account"] = u"@{screen_name}".format(screen_name=self.screen_name(tweet_url)) try: tweet_match = re.findall(u'<p>(.*?)</p>.*statuses/\d+">(.*?)</a></blockquote>', biblio_dict["embed"]) biblio_dict["tweet_text"] = tweet_match[0][0] biblio_dict["date"] = datetime.datetime.strptime(tweet_match[0][1], "%B %d, %Y").isoformat() biblio_dict["year"] = biblio_dict["date"][0:4] except (AttributeError): logger.debug("couldn't parse tweet embed {embed}".format( embed=biblio_dict["embed"])) return biblio_dict
def _get_uuid_from_title(self, aliases_dict, page): data = provider._load_json(page) doi = aliases_dict["doi"][0] biblio = aliases_dict["biblio"][0] for mendeley_record in data["documents"]: if mendeley_record["doi"] == doi: uuid = mendeley_record["uuid"] return uuid else: # more complicated. Try to match title and year. try: mendeley_title = self.remove_punctuation(mendeley_record["title"]).lower() aliases_title = self.remove_punctuation(biblio["title"]).lower() except (TypeError, KeyError, AttributeError): continue # nothing to see here. Skip to next record if mendeley_title == aliases_title: if mendeley_record["year"] == biblio["year"]: # check if author name in common. if not, yell, but continue anyway first_mendeley_surname = mendeley_record["authors"][0]["surname"] has_matching_authors = first_mendeley_surname.lower() in biblio["authors"].lower() if not has_matching_authors: logger.warning("Mendeley: NO MATCHING AUTHORS between %s and %s" %( first_mendeley_surname, biblio["authors"])) # but return it anyway uuid = mendeley_record["uuid"] return uuid else: logger.debug("Mendeley: years don't match %s and %s" %( str(mendeley_record["year"]), str(biblio["year"]))) else: logger.debug("Mendeley: titles don't match %s and %s" %( self.remove_punctuation(mendeley_record["title"]), self.remove_punctuation(biblio["title"]))) # no joy return None
def _extract_provenance_url(self, page, status_code=200, id=None): data = provider._load_json(page) try: provenance_url = data['mendeley_url'] except KeyError: provenance_url = "" return provenance_url
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "snippet" in page: raise ProviderContentMalformedError json_response = provider._load_json(page) this_video_json = json_response["items"][0] dict_of_keylists = { 'youtube:views' : ['statistics', 'viewCount'], 'youtube:likes' : ['statistics', 'likeCount'], 'youtube:dislikes' : ['statistics', 'dislikeCount'], 'youtube:favorites' : ['statistics', 'favoriteCount'], 'youtube:comments' : ['statistics', 'commentCount'], } metrics_dict = provider._extract_from_data_dict(this_video_json, dict_of_keylists) metrics_dict = provider._metrics_dict_as_ints(metrics_dict) return metrics_dict
def top_tweeted_urls(self, query, query_type="site", number_to_return=10, pages=5): if query_type == "site": query = re.sub("http(s?)://", "", query.lower()) elif query_type in ["twitter", "tweets_about"]: query = query.replace("@", "") template_url = self.top_tweeted_url_templates[query_type] urls = [template_url % (query, i) for i in range(1, pages + 1)] responses = self.http_get_multiple(urls) tweeted_entries = [] for url in responses: tweeted_entries += provider._load_json( responses[url].text)["response"]["list"] sorted_list = sorted(tweeted_entries, key=itemgetter('hits'), reverse=True) top_tweeted_urls = [] #needs to be ordered for entry in sorted_list: url = self.get_url_from_entry(query, entry, query_type) if url and (url not in top_tweeted_urls): top_tweeted_urls.append(url) return (top_tweeted_urls[0:number_to_return])
def _extract_members(self, page, query_string=None): if 'orcid-profile' not in page: raise ProviderContentMalformedError("Content does not contain expected text") data = provider._load_json(page) dois = [] try: orcid_works = data["orcid-profile"]["orcid-activities"]["orcid-works"]["orcid-work"] except KeyError: return [] for work in orcid_works: try: ids = work["work-external-identifiers"]["work-external-identifier"] for myid in ids: if myid['work-external-identifier-type'] == "DOI": doi = myid['work-external-identifier-id']['value'] dois += [doi] except KeyError: logger.info("no external identifiers for %s, so skipping" %(str(work))) pass if not dois: raise ProviderItemNotFoundError members = [("doi", doi) for doi in list(set(dois))] return(members)
def _extract_members(self, page, query_string=None): if 'orcid-profile' not in page: raise ProviderContentMalformedError("Content does not contain expected text") data = provider._load_json(page) members = [] try: orcid_works = data["orcid-profile"]["orcid-activities"]["orcid-works"]["orcid-work"] except KeyError: return [] for work in orcid_works: new_member = None try: ids = work["work-external-identifiers"]["work-external-identifier"] for myid in ids: if myid['work-external-identifier-type'] == "DOI": new_member = ("doi", myid['work-external-identifier-id']['value']) if myid['work-external-identifier-type'] == "PMID": new_member = ("pmid", myid['work-external-identifier-id']['value']) except KeyError: logger.info(u"no external identifiers, try saving whole citation") biblio = self._parse_orcid_work(work) new_member = ("biblio", biblio) if new_member: members += [new_member] if not members: raise ProviderItemNotFoundError return(members)
def _extract_members(self, page, query_string=None): data = provider._load_json(page) dois = [ item["DOI"].replace("http://dx.doi.org/", "") for item in data["items"] ] doi_aliases = [("doi", doi) for doi in dois] return (doi_aliases)
def _extract_item(self, page, id): data = provider._load_json(page) if not data: return {} item = data["items"][0] if item["doi"] == self._get_templated_url(self.provenance_url_template, id, "provenance"): return item else: return {}
def _extract_relevant_record(self, fullpage, id): data = provider._load_json(fullpage) response = None try: response = data["search-results"]["entry"][0] except (KeyError, ValueError): # not in Scopus database return None return response
def _extract_figshare_record(self, page, id): data = provider._load_json(page) if not data: return {} item = data["items"][0] if str(item["article_id"]) in id: return item else: return {}
def _extract_members(self, page, account_name): members = [] # add repositories from account data = provider._load_json(page) review_urls = [review["_id"]["url"] for review in data["reviews"] if review["title"]!="An undisclosed article"] members += [("url", url) for url in review_urls] return(members)
def _get_json(self, fullpage): try: # extract json from inside the first and last parens # from http://codereview.stackexchange.com/questions/2561/converting-jsonp-to-json-is-this-regex-correct page = fullpage[fullpage.index("(") + 1:fullpage.rindex(")")] except (AttributeError, ValueError): raise ProviderContentMalformedError() data = provider._load_json(page) return (data)
def _get_json(self, fullpage): try: # extract json from inside the first and last parens # from http://codereview.stackexchange.com/questions/2561/converting-jsonp-to-json-is-this-regex-correct page = fullpage[ fullpage.index("(")+1 : fullpage.rindex(")") ] except (AttributeError, ValueError): raise ProviderContentMalformedError() data = provider._load_json(page) return(data)
def _extract_members(self, page, account_name): members = [] # add repositories from account data = provider._load_json(page) repos = [repo["name"] for repo in data] members += [("url", self.repo_url_template %(account_name, repo)) for repo in list(set(repos))] # also add account product! members += [("url", self.account_url_template %(account_name))] return(members)
def _extract_members(self, page, account_name): members = [] # add repositories from account data = provider._load_json(page) review_urls = [ review["_id"]["url"] for review in data["reviews"] if review["title"] != "An undisclosed article" ] members += [("url", url) for url in review_urls] return (members)
def _extract_members(self, page, account_name): members = [] # add repositories from account data = provider._load_json(page) repos = [repo["name"] for repo in data] members += [("url", self.repo_url_template % (account_name, repo)) for repo in list(set(repos))] # also add account product! members += [("url", self.account_url_template % (account_name))] return (members)
def _extract_metrics(self, page, status_code=200, id=None): metrics_dict = {} if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) data = provider._load_json(page) number_of_bookmarks = len(data) if number_of_bookmarks: metrics_dict = {'delicious:bookmarks': number_of_bookmarks} return metrics_dict
def member_items(self, account_name, provider_url_template=None, cache_enabled=True): if not self.provides_members: raise NotImplementedError() self.logger.debug(u"%s getting member_items for %s" % (self.provider_name, account_name)) if not provider_url_template: provider_url_template = self.member_items_url_template members = [] figshare_userid = self.get_figshare_userid_from_author_url(account_name) if not figshare_userid: raise ProviderContentMalformedError("no figshare user id found") next_page = 1 while next_page: url = provider_url_template % (figshare_userid, next_page) # try to get a response from the data provider response = self.http_get(url, cache_enabled=cache_enabled) if response.status_code != 200: self.logger.info(u"%s status_code=%i" % (self.provider_name, response.status_code)) if response.status_code == 404: raise ProviderItemNotFoundError elif response.status_code == 303: #redirect pass else: self._get_error(response.status_code, response) # extract the member ids number_of_items_per_page = 10 #figshare default try: page = response.text data = provider._load_json(page) if data["items_found"] > next_page*number_of_items_per_page: next_page += 1 else: next_page = None members += self._extract_members(page, account_name) except (AttributeError, TypeError): next_page = None return(members)
def member_items(self, account_name, provider_url_template=None, cache_enabled=True): if not self.provides_members: raise NotImplementedError() self.logger.debug(u"%s getting member_items for %s" % (self.provider_name, account_name)) if not provider_url_template: provider_url_template = self.member_items_url_template figshare_userid = self.get_figshare_userid_from_author_url( account_name) next_page = 1 members = [] while next_page: url = provider_url_template % (figshare_userid, next_page) # try to get a response from the data provider response = self.http_get(url, cache_enabled=cache_enabled) if response.status_code != 200: self.logger.info(u"%s status_code=%i" % (self.provider_name, response.status_code)) if response.status_code == 404: raise ProviderItemNotFoundError elif response.status_code == 303: #redirect pass else: self._get_error(response.status_code, response) # extract the member ids number_of_items_per_page = 10 #figshare default try: page = response.text data = provider._load_json(page) if data["items_found"] > next_page * number_of_items_per_page: next_page += 1 else: next_page = None members += self._extract_members(page, account_name) except (AttributeError, TypeError): next_page = None return (members)
def _extract_members(self, page, query_string=None): if 'orcid-profile' not in page: raise ProviderContentMalformedError( "Content does not contain expected text") data = provider._load_json(page) members = [] try: orcid_works = data["orcid-profile"]["orcid-activities"][ "orcid-works"]["orcid-work"] except KeyError: return [] for work in orcid_works: new_member = None try: ids = work["work-external-identifiers"][ "work-external-identifier"] for myid in ids: if myid['work-external-identifier-type'] == "DOI": doi = myid['work-external-identifier-id']['value'] doi = crossref.clean_doi(doi) if doi: new_member = ("doi", doi) if myid['work-external-identifier-type'] == "PMID": new_member = ( "pmid", myid['work-external-identifier-id']['value']) except KeyError: pass if not new_member: logger.info( u"no external identifiers, try saving whole citation for {orcid}" .format(orcid=query_string)) biblio = self._parse_orcid_work(work) new_member = ("biblio", biblio) if new_member: members += [new_member] if not members: raise ProviderItemNotFoundError logger.info(u"returning {n} members for {orcid}".format( n=len(members), orcid=query_string)) return (members)
def _extract_metrics(self, page, status_code=200, id=None): metrics_dict = {} if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) data = provider._load_json(page) number_of_bookmarks = len(data) if number_of_bookmarks: metrics_dict = { 'delicious:bookmarks' : number_of_bookmarks } return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) data = provider._load_json(page) metrics_dict = {} for section in data["article"]["source"]: source = provider._lookup_json(section, ["source"]) if (source == "Counter"): #drilldown_url = provider._lookup_json(section["citations"][0], ["citation", "uri"]) html_sum = self._aggregate_monthly_stats("html_views", section) metrics_dict["html_views"] = html_sum pdf_sum = self._aggregate_monthly_stats("pdf_views", section) metrics_dict["pdf_views"] = pdf_sum elif (source == "PubMed Central Usage Stats"): #drilldown_url = provider._lookup_json(section["citations"][0], ["citation", "uri"]) try: first_month_stats = section["events"][0] except KeyError: logger.debug("%20s no first_month_stats for %s" % (self.provider_name, id)) first_month_stats = [] for metric_name in first_month_stats: normalized_metric_name = "pmc_" + self._normalize_source(metric_name) if (normalized_metric_name in self.static_meta_dict.keys()): total = self._aggregate_monthly_stats(metric_name, section) if total: metrics_dict[normalized_metric_name] = total elif (self._normalize_source(source) in self.static_meta_dict.keys()): total = provider._lookup_json(section, ["count"]) if total: #drilldown_url = provider._lookup_json(section, ["public_url"]) #if not drilldown_url: # drilldown_url = "" metrics_dict[source] = total rekeyed_dict = dict(("plosalm:"+self._normalize_source(k),v) for (k,v) in metrics_dict.iteritems()) return rekeyed_dict
def biblio(self, aliases, provider_url_template=None, cache_enabled=True): nid = self.get_best_id(aliases) url = self.biblio_template_url % (self.tweet_id(nid)) response = self.http_get(url) data = provider._load_json(response.text) biblio_dict = {} biblio_dict["repository"] = "Twitter" if not data: return biblio_dict biblio_dict["title"] = u"@{screen_name}".format(screen_name=self.screen_name(nid)) biblio_dict["authors"] = data["author_name"] biblio_dict["embed"] = data["html"] return biblio_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "sources" in page: raise ProviderContentMalformedError json_response = provider._load_json(page) this_article = json_response[0]["sources"][0]["metrics"] dict_of_keylists = { 'plosalm:html_views' : ['html'], 'plosalm:pdf_views' : ['pdf'] } metrics_dict = provider._extract_from_data_dict(this_article, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) if not "user_id" in page: raise ProviderContentMalformedError json_response = provider._load_json(page) this_video_json = json_response[0] dict_of_keylists = { "vimeo:plays": ["stats_number_of_plays"], "vimeo:likes": ["stats_number_of_likes"], "vimeo:comments": ["stats_number_of_comments"], } metrics_dict = provider._extract_from_data_dict(this_video_json, dict_of_keylists) return metrics_dict
def _extract_biblio(self, page, id=None): json_response = provider._load_json(page) this_video_json = json_response[0] dict_of_keylists = { 'title': ['title'], 'authors': ['user_name'], 'published_date': ['upload_date'], 'url': ['url'] } biblio_dict = provider._extract_from_data_dict(this_video_json, dict_of_keylists) try: biblio_dict["year"] = biblio_dict["published_date"][0:4] except KeyError: pass biblio_dict["repository"] = "Vimeo" return biblio_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "user_id" in page: raise ProviderContentMalformedError json_response = provider._load_json(page) this_video_json = json_response[0] dict_of_keylists = { 'vimeo:plays' : ['stats_number_of_plays'], 'vimeo:likes' : ['stats_number_of_likes'], 'vimeo:comments' : ['stats_number_of_comments'] } metrics_dict = provider._extract_from_data_dict(this_video_json, dict_of_keylists) return metrics_dict
def _extract_biblio(self, page, id=None): json_response = provider._load_json(page) this_video_json = json_response[0] dict_of_keylists = { "title": ["title"], "authors": ["user_name"], "published_date": ["upload_date"], "url": ["url"], } biblio_dict = provider._extract_from_data_dict(this_video_json, dict_of_keylists) try: biblio_dict["year"] = biblio_dict["published_date"][0:4] except KeyError: pass biblio_dict["repository"] = "Vimeo" return biblio_dict
def top_tweeted_urls(self, query, query_type="site", number_to_return=10, pages=5): if query_type == "site": query = re.sub("http(s?)://", "", query.lower()) elif query_type in ["twitter", "tweets_about"]: query = query.replace("@", "") template_url = self.top_tweeted_url_templates[query_type] urls = [template_url % (query, i) for i in range(1, pages+1)] responses = self.http_get_multiple(urls) tweeted_entries = [] for url in responses: tweeted_entries += provider._load_json(responses[url].text)["response"]["list"] sorted_list = sorted(tweeted_entries, key=itemgetter('hits'), reverse=True) top_tweeted_urls = [] #needs to be ordered for entry in sorted_list: url = self.get_url_from_entry(query, entry, query_type) if url and (url not in top_tweeted_urls): top_tweeted_urls.append(url) return(top_tweeted_urls[0:number_to_return])
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) metrics_dict = {} if "hits" in page: data = provider._load_json(page) hits = [post["hits"] for post in data["response"]["list"]] if hits: sum_of_hits = sum(hits) metrics_dict["topsy:tweets"] = sum_of_hits else: dict_of_keylists = { 'topsy:tweets': ['response', 'all'], 'topsy:influential_tweets': ['response', 'influential'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) metrics_dict = {} if "hits" in page: data = provider._load_json(page) hits = [post["hits"] for post in data["response"]["list"]] if hits: sum_of_hits = sum(hits) metrics_dict["topsy:tweets"] = sum_of_hits else: dict_of_keylists = { 'topsy:tweets' : ['response', 'all'], 'topsy:influential_tweets' : ['response', 'influential'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_members(self, page, query_string=None): data = provider._load_json(page) dois = [item["DOI"].replace("http://dx.doi.org/", "") for item in data["items"]] doi_aliases = [("doi", doi) for doi in dois] return(doi_aliases)
def _get_uuid_from_title(self, aliases_dict, page): data = provider._load_json(page) try: doi = aliases_dict["doi"][0] except KeyError: doi = None try: biblio = aliases_dict["biblio"][0] except KeyError: biblio = None for mendeley_record in data["documents"]: if doi and (mendeley_record["doi"] == doi): uuid = mendeley_record["uuid"] return {"uuid": uuid} else: # more complicated. Try to match title and year. try: mendeley_title = self.remove_punctuation(mendeley_record["title"]).lower() aliases_title = self.remove_punctuation(biblio["title"]).lower() except (TypeError, KeyError, AttributeError): logger.warning(u"Mendeley: NO TITLES for aliases, skipping") continue # nothing to see here. Skip to next record try: if len(str(biblio["year"])) != 4: logger.warning(u"Mendeley: NO YEAR for aliases, skipping") continue except (TypeError, KeyError, AttributeError): logger.warning(u"Mendeley: NO YEAR for aliases, skipping") continue # nothing to see here. Skip to next record if mendeley_title == aliases_title: if str(mendeley_record["year"]) == str(biblio["year"]): # check if author name in common. if not, yell, but continue anyway first_mendeley_surname = mendeley_record["authors"][0]["surname"] has_matching_authors = first_mendeley_surname.lower() in biblio["authors"].lower() if not has_matching_authors: logger.warning( u"Mendeley: NO MATCHING AUTHORS between %s and %s" % (first_mendeley_surname, biblio["authors"]) ) # but return it anyway response = {} for id_type in ["uuid", "mendeley_url", "doi", "pmid"]: try: if mendeley_record[id_type]: if id_type == "mendeley_url": response["url"] = mendeley_record[id_type] else: response[id_type] = mendeley_record[id_type] except KeyError: pass return response else: logger.debug( u"Mendeley: years don't match %s and %s" % (str(mendeley_record["year"]), str(biblio["year"])) ) else: logger.debug( u"Mendeley: titles don't match /biblio_print %s and %s" % (self.remove_punctuation(mendeley_record["title"]), self.remove_punctuation(biblio["title"])) ) # no joy return None
def _get_uuid_from_title(self, aliases_dict, page): data = provider._load_json(page) try: doi = aliases_dict["doi"][0] except KeyError: doi = None try: biblio = aliases_dict["biblio"][0] except KeyError: biblio = None for mendeley_record in data["documents"]: if doi and (mendeley_record["doi"] == doi): uuid = mendeley_record["uuid"] return {"uuid": uuid} else: # more complicated. Try to match title and year. try: mendeley_title = self.remove_punctuation( mendeley_record["title"]).lower() aliases_title = self.remove_punctuation( biblio["title"]).lower() except (TypeError, KeyError, AttributeError): logger.warning( u"Mendeley: NO TITLES for aliases, skipping") continue # nothing to see here. Skip to next record try: if (len(str(biblio["year"])) != 4): logger.warning( u"Mendeley: NO YEAR for aliases, skipping") continue except (TypeError, KeyError, AttributeError): logger.warning(u"Mendeley: NO YEAR for aliases, skipping") continue # nothing to see here. Skip to next record if (mendeley_title == aliases_title): if (str(mendeley_record["year"]) == str(biblio["year"])): # check if author name in common. if not, yell, but continue anyway first_mendeley_surname = mendeley_record["authors"][0][ "surname"] has_matching_authors = first_mendeley_surname.lower( ) in biblio["authors"].lower() if not has_matching_authors: logger.warning( u"Mendeley: NO MATCHING AUTHORS between %s and %s" % (first_mendeley_surname, biblio["authors"])) # but return it anyway response = {} for id_type in ["uuid", "mendeley_url", "doi", "pmid"]: try: if mendeley_record[id_type]: if id_type == "mendeley_url": response["url"] = mendeley_record[ id_type] else: response[id_type] = mendeley_record[ id_type] except KeyError: pass return response else: logger.debug( u"Mendeley: years don't match %s and %s" % (str( mendeley_record["year"]), str(biblio["year"]))) else: logger.debug( u"Mendeley: titles don't match /biblio_print %s and %s" % (self.remove_punctuation(mendeley_record["title"]), self.remove_punctuation(biblio["title"]))) # no joy return None
def _extract_members(self, page, query_string): data = provider._load_json(page) hits = [hit["name"] for hit in data] members = [("github", (query_string, hit)) for hit in list(set(hits))] return(members)
def _extract_members(self, page, query_string): data = provider._load_json(page) hits = [hit["name"] for hit in data] members = [("url", self.repo_url_template %(query_string, hit)) for hit in list(set(hits))] return(members)
def _extract_members(self, page, query_string): data = provider._load_json(page) hits = [hit["name"] for hit in data] members = [("url", self.repo_url_template % (query_string, hit)) for hit in list(set(hits))] return (members)