def finddoi(self, num, prefix='', issn=''): title = self.gettitle(num) doi = DOI(self.getdoi(num)) if (not prefix): prefix = doi.split('/', 1)[0] if doi else "" volume = self.getvolume(num) journal = self.getjournalfull(num) year = self.getyear(num) pages = self.getpages(num) self.cr = CRrecord() try: # The origin doi maybe true. Find in crossref if (doi and self.cr.getfromdoi(doi, fullparse=False) and self.cr.doi): # Further check title if (strdiff(doi,self.cr.doi)>=0.85 and \ strsimilarity(normalizeString(title),normalizeString(self.cr.title))>0.75): return doi if (volume and pages): ops = pages.split('-') crps = self.cr.pages.split('-') if (len(ops) > 0 and len(crps) > 0 and ops[0] == crps[0] and volume == self.cr.volume): return doi if (year and pages): ops = pages.split('-') crps = self.cr.pages.split('-') if (len(ops) > 0 and len(crps) > 0 and ops[0] == crps[0] and year == self.cr.year): return doi print "Origin DOI:", doi, "may be true but record strange..Try title" keyword = title + " " + journal + " " + year + " " + pages + " " + volume if (self.cr.getfromtitledoi(keyword, doi, year=year, limit=10, fullparse=False, prefix=prefix)): if (doi): if (prefix == self.cr.doi.split('/')[0] and strdiff(doi, self.cr.doi) >= 0.85): return self.cr.doi else: print "Error for origin doi: " + doi + "; found: " + self.cr.doi return "" return self.cr.doi if (doi): if (strdiff(doi, self.cr.doi) >= 0.85): return self.cr.doi else: print "Error2 for origin doi: " + doi + "; found: " + self.cr.doi return "" else: return "" except Exception as e: print "Error when find doi..", e, "\nRetry..." return self.finddoi(num, prefix=prefix, issn=issn)
def finddoi(self,num,prefix='',issn=''): title=self.gettitle(num) doi=DOI(self.getdoi(num)) if (not prefix): prefix = doi.split('/',1)[0] if doi else "" volume= self.getvolume(num) journal=self.getjournalfull(num) year=self.getyear(num) pages=self.getpages(num) self.cr=CRrecord() try: # The origin doi maybe true. Find in crossref if ( doi and self.cr.getfromdoi(doi,fullparse=False) and self.cr.doi): # Further check title if (strdiff(doi,self.cr.doi)>=0.85 and \ strsimilarity(normalizeString(title),normalizeString(self.cr.title))>0.75): return doi if( volume and pages ): ops=pages.split('-') crps=self.cr.pages.split('-') if (len(ops)>0 and len(crps)>0 and ops[0]==crps[0] and volume==self.cr.volume): return doi if( year and pages ): ops=pages.split('-') crps=self.cr.pages.split('-') if (len(ops)>0 and len(crps)>0 and ops[0]==crps[0] and year==self.cr.year): return doi print "Origin DOI:",doi,"may be true but record strange..Try title" keyword=title+" "+journal+" "+year+" "+pages+" "+volume if (self.cr.getfromtitledoi(keyword,doi,year=year,limit=10,fullparse=False,prefix=prefix)): if (doi): if( prefix == self.cr.doi.split('/')[0] and strdiff(doi,self.cr.doi)>=0.85): return self.cr.doi else: print "Error for origin doi: "+doi+"; found: "+self.cr.doi return "" return self.cr.doi if (doi): if( strdiff(doi,self.cr.doi)>=0.85): return self.cr.doi else: print "Error2 for origin doi: "+doi+"; found: "+self.cr.doi return "" else: return "" except Exception as e: print "Error when find doi..",e,"\nRetry..." return self.finddoi(num,prefix=prefix,issn=issn)
def getfromtitledoi( self, title, doi, year="", volume="", issue="", pages="", limit=3, offset=0, cutoff=0.1, fullparse=True, ignorecheminfo=True, prefix="", issn="", ): """Get information from journal title and doi, better with year, volume, issue, pages information""" # Over max records try if offset > limit: return False # Cancel ISSN check because unreliable # search url if issn and len(issn.strip()) is 9: url = ( "http://api.crossref.org/journals/" + issn + "/works?query=" + normalizeString(title) + "&rows=1&offset=" + str(offset) ) elif prefix: url = ( "http://api.crossref.org/prefixes/" + prefix + "/works?query=" + normalizeString(title) + "&rows=1&offset=" + str(offset) ) else: url = "http://api.crossref.org/works?query=" + normalizeString(title) + "&rows=1&offset=" + str(offset) if year: # some time year maybe +- 1 url += "&filter=from-pub-date:" + str(int(year) - 1) + "-06,until-pub-date:" + str(int(year) + 1) + "-06" # print url # search crossref r = requests.get(url, timeout=timeout_setting) if r.status_code is 200: try: for currentrecord in range(len(r.json()["message"]["items"])): data = r.json()["message"]["items"][currentrecord] # should better then cutoff if float(data["score"]) > cutoff: self.title = data.get("title", [""])[0] self.year = str(data["issued"]["date-parts"][0][0]) self.volume = data.get("volume", "") self.issue = data.get("issue", "") self.pages = data.get("page", "") self.doi = data.get("DOI", "") if fullparse: self.journals = data.get("container-title", [""]) self.issns = data.get("ISSN", [""]) if len(self.journals) >= 1: self.journal = self.journals[0] else: self.journal = "" if len(self.issns) >= 1: self.issn = self.issns[0] else: self.issn = "" self.authors = self._getauthor(data.get("author", [])) self.urls = [data.get("URL", "")] if doi.strip(): if strdiff(doi.strip(), self.doi) >= 0.85: return True # else blank # check whether fitting to giving parameters if year and year.strip() != self.year.strip(): # possible +- 1year if not (abs(int(year) - int(self.year)) is 1 and volume.strip() == self.volume.strip()): continue if volume and volume.strip() != self.volume.strip(): continue if pages and pages.strip().split("-")[0] != self.pages.strip().split("-")[0]: continue if ignorecheminfo and data.get("container-title", [""])[0].lower() == "cheminform": continue return True # Low score, more try. else: continue return False except: print "Something error for finding " + title.encode("utf-8") return False else: print "Journal title can't be found: " + title.encode("utf-8") return False
def getfromtitledoi(self,title,doi, year="",volume="",issue="",pages="", \ limit=3, offset=0, cutoff=0.1, fullparse=True,ignorecheminfo=True,prefix="",issn=""): '''Get information from journal title and doi, better with year, volume, issue, pages information''' # Over max records try if (offset > limit): return False # Cancel ISSN check because unreliable # search url if (issn and len(issn.strip()) is 9): url = "http://api.crossref.org/journals/" + issn + "/works?query=" + normalizeString( title) + "&rows=1&offset=" + str(offset) elif (prefix): url = "http://api.crossref.org/prefixes/" + prefix + "/works?query=" + normalizeString( title) + "&rows=1&offset=" + str(offset) else: url = "http://api.crossref.org/works?query=" + normalizeString( title) + "&rows=1&offset=" + str(offset) if (year): #some time year maybe +- 1 url += "&filter=from-pub-date:" + str(int( year) - 1) + "-06,until-pub-date:" + str(int(year) + 1) + "-06" #print url # search crossref r = requests.get(url, timeout=timeout_setting) if (r.status_code is 200): try: for currentrecord in range(len(r.json()['message']['items'])): data = r.json()['message']['items'][currentrecord] # should better then cutoff if (float(data['score']) > cutoff): self.title = data.get('title', [''])[0] self.year = str(data['issued']['date-parts'][0][0]) self.volume = data.get('volume', '') self.issue = data.get('issue', '') self.pages = data.get('page', '') self.doi = data.get('DOI', '') if (fullparse): self.journals = data.get('container-title', ['']) self.issns = data.get('ISSN', ['']) if (len(self.journals) >= 1): self.journal = self.journals[0] else: self.journal = "" if (len(self.issns) >= 1): self.issn = self.issns[0] else: self.issn = "" self.authors = self._getauthor( data.get('author', [])) self.urls = [data.get('URL', '')] if (doi.strip()): if (strdiff(doi.strip(), self.doi) >= 0.85): return True #else blank # check whether fitting to giving parameters if (year and year.strip() != self.year.strip()): # possible +- 1year if not (abs(int(year) - int(self.year)) is 1 and volume.strip() == self.volume.strip()): continue if (volume and volume.strip() != self.volume.strip()): continue if (pages and pages.strip().split('-')[0] != self.pages.strip().split('-')[0]): continue if (ignorecheminfo and data.get('container-title', [''])[0].lower() == "cheminform"): continue return True # Low score, more try. else: continue return False except: print "Something error for finding " + title.encode('utf-8') return False else: print "Journal title can't be found: " + title.encode('utf-8') return False