def generateUniqueID(paper): """ Returns a simple string id that is the mashup of the title and authors :param paper: :return: """ author_bit = '' if paper.extra_data.get('xref_author'): authors = paper.extra_data['xref_author'] else: try: authors = parseBibAuthors(paper.authors) except: print("Failed to parse authors string", paper.authors) authors = [{'given': '', 'family': ''}] for author in authors: if isinstance(author, str): author_bit += author else: if author.get('family'): author_bit += author.get('family', '_')[0] + author.get( 'given', '_')[0] title_bit = normalizeTitle(paper.title) title_bit = re.sub("\s+", "", title_bit) full_id = title_bit + "_" + author_bit full_id = full_id.lower() return full_id
def exportBibToRIS(entries): lines = [] for entry in entries: authors = parseBibAuthors(entry['author']) if entry['ENTRYTYPE'].lower() in type_mapping: ris_type = type_mapping[entry['ENTRYTYPE'].lower()] else: ris_type = 'JOUR' lines.append('TY - ' + ris_type) for author in authors: au_line = 'AU - %s, %s' % (author['family'], author['given']) if author.get('middle'): au_line += ' ' + author['middle'] lines.append(au_line) # lines.append('PY - %s/%s/%s/' % (entry['year'], entry['month'], entry['day'])) lines.append('PY - %s' % (entry.get('year', ''), )) pages = entry.get('pages') if pages: bits = pages.split('-') lines.append('SP - ' + bits[0]) lines.append('EP - ' + bits[-1]) for eq in mapping: if entry.get(eq[0]): lines.append(str(eq[1]) + ' - ' + str(entry[eq[0]])) lines.append('ER - ') return '\n'.join(lines)
def computeAuthorDistance(paper1, paper2): """ Returns a measure of how much the authors of papers overlap :param paper1: :param paper2: :return: """ if not paper1.bib.get('author') or not paper2.bib.get('author'): return 1 authors1 = paper1.extra_data.get('x_authors', parseBibAuthors(paper1.bib.get('author'))) authors2 = paper2.extra_data.get('x_authors', parseBibAuthors(paper2.bib.get('author'))) score = 0 if len(authors1) >= len(authors2): a_short = authors2 a_long = authors1 else: a_short = authors1 a_long = authors2 max_score = 0 for index, author in enumerate(a_short): factor = (len(a_long) - index)**2 if author['family'].lower() == a_long[index]['family'].lower(): score += factor max_score += factor if max_score == 0: return 1 distance = 1 - (score / max_score) return distance
def generateFilename(paper): res = '' authors = parseBibAuthors(paper.authors) if not authors: print(paper.authors) print() if authors and authors[0].get('family'): res += authors[0]['family'] + ' ' if paper.year: res += '(%s)' % paper.year if len(res) > 0: res += ' - ' res += paper.norm_title.title() return res
def fixBibData(bib, index): """ Add mandatory missing fields to bibtex data :param bib: :param index: :return: """ if "ENTRYTYPE" not in bib: bib["ENTRYTYPE"] = "ARTICLE" if "ID" not in bib: authors = parseBibAuthors(bib["author"]) if not authors: bib['ID'] = 'id' + str(random.randint(1000, 9000)) else: bib["ID"] = authors[0]["family"] bib['ID'] += str(bib.get("year", "YEAR")) + bib["title"].split()[0].lower() return bib