Beispiel #1
0
def generateUniqueID(paper):
    """
    Returns a simple string id that is the mashup of the title and authors

    :param paper:
    :return:
    """
    author_bit = ''
    if paper.extra_data.get('xref_author'):
        authors = paper.extra_data['xref_author']
    else:
        try:
            authors = parseBibAuthors(paper.authors)
        except:
            print("Failed to parse authors string", paper.authors)
            authors = [{'given': '', 'family': ''}]

    for author in authors:
        if isinstance(author, str):
            author_bit += author
        else:
            if author.get('family'):
                author_bit += author.get('family', '_')[0] + author.get(
                    'given', '_')[0]

    title_bit = normalizeTitle(paper.title)
    title_bit = re.sub("\s+", "", title_bit)
    full_id = title_bit + "_" + author_bit
    full_id = full_id.lower()

    return full_id
Beispiel #2
0
def exportBibToRIS(entries):
    lines = []
    for entry in entries:
        authors = parseBibAuthors(entry['author'])

        if entry['ENTRYTYPE'].lower() in type_mapping:
            ris_type = type_mapping[entry['ENTRYTYPE'].lower()]
        else:
            ris_type = 'JOUR'

        lines.append('TY  - ' + ris_type)

        for author in authors:
            au_line = 'AU  - %s, %s' % (author['family'], author['given'])
            if author.get('middle'):
                au_line += ' ' + author['middle']
            lines.append(au_line)

        # lines.append('PY  - %s/%s/%s/' % (entry['year'], entry['month'], entry['day']))
        lines.append('PY  - %s' % (entry.get('year', ''), ))

        pages = entry.get('pages')
        if pages:
            bits = pages.split('-')

            lines.append('SP  - ' + bits[0])
            lines.append('EP  - ' + bits[-1])

        for eq in mapping:
            if entry.get(eq[0]):
                lines.append(str(eq[1]) + '  - ' + str(entry[eq[0]]))

        lines.append('ER  - ')

    return '\n'.join(lines)
Beispiel #3
0
def computeAuthorDistance(paper1, paper2):
    """
    Returns a measure of how much the authors of papers overlap

    :param paper1:
    :param paper2:
    :return:
    """
    if not paper1.bib.get('author') or not paper2.bib.get('author'):
        return 1

    authors1 = paper1.extra_data.get('x_authors',
                                     parseBibAuthors(paper1.bib.get('author')))
    authors2 = paper2.extra_data.get('x_authors',
                                     parseBibAuthors(paper2.bib.get('author')))

    score = 0
    if len(authors1) >= len(authors2):
        a_short = authors2
        a_long = authors1
    else:
        a_short = authors1
        a_long = authors2

    max_score = 0

    for index, author in enumerate(a_short):
        factor = (len(a_long) - index)**2
        if author['family'].lower() == a_long[index]['family'].lower():
            score += factor

        max_score += factor

    if max_score == 0:
        return 1

    distance = 1 - (score / max_score)
    return distance
Beispiel #4
0
def generateFilename(paper):
    res = ''
    authors = parseBibAuthors(paper.authors)
    if not authors:
        print(paper.authors)
        print()

    if authors and authors[0].get('family'):
        res += authors[0]['family'] + ' '
    if paper.year:
        res += '(%s)' % paper.year

    if len(res) > 0:
        res += ' - '
    res += paper.norm_title.title()
    return res
Beispiel #5
0
def fixBibData(bib, index):
    """
    Add mandatory missing fields to bibtex data

    :param bib:
    :param index:
    :return:
    """
    if "ENTRYTYPE" not in bib:
        bib["ENTRYTYPE"] = "ARTICLE"
    if "ID" not in bib:
        authors = parseBibAuthors(bib["author"])
        if not authors:
            bib['ID'] = 'id' + str(random.randint(1000, 9000))
        else:
            bib["ID"] = authors[0]["family"]

        bib['ID'] += str(bib.get("year",
                                 "YEAR")) + bib["title"].split()[0].lower()

    return bib