Esempio n. 1
0
def parse_book(item):

    book = get_or_default(
        Book,
        Book(),
        slug=nslugify(item['data']['bookTitle'], _parse_date(item).year, item['data'].get('volume'), item['data'].get('series')),
        year=_parse_date(item).year,
    )

    book.title = item['data']['bookTitle']

    book.isbn = _assign_if_exists(item, 'ISBN')
    book.volume = _assign_if_exists(item, 'volume')
    book.series = _assign_if_exists(item, 'series')
    book.publisher = _assign_if_exists(item, 'publisher')
    book.place = _assign_if_exists(item, 'place')

    book.published = _parse_date(item)
    book.year = book.published.year

    book.save()

    _save_publication_editors(_extract_editors(item), book)

    return book
Esempio n. 2
0
def parse_proceedings(item):
    if item['data']['proceedingsTitle'] != '':
        proceedings_title = item['data']['proceedingsTitle']

    else:
        if item['data']['conferenceName'] != '':
            proceedings_title = 'Proceedings of conference: %s' % item['data']['conferenceName']

        else:
            proceedings_title = 'Proceedings for article: %s' % item['data']['title']

    proceedings = get_or_default(
        Proceedings,
        Proceedings(),
        slug=nslugify(proceedings_title, _parse_date(item).year, item['data'].get('volume')),
        year=_parse_date(item).year,
    )

    proceedings.title = proceedings_title

    proceedings.isbn = _assign_if_exists(item, 'ISBN')
    proceedings.volume = _assign_if_exists(item, 'volume')
    proceedings.series = _assign_if_exists(item, 'series')
    proceedings.publisher = _assign_if_exists(item, 'publisher')
    proceedings.place = _assign_if_exists(item, 'place')

    proceedings.published = _parse_date(item)
    proceedings.year = proceedings.published.year

    proceedings.save()

    return proceedings
Esempio n. 3
0
def _extract_editors(item):
    editors = []

    if 'creators' in item['data'] and len(item['data']['creators']) > 0:
        for creator_item in item['data']['creators']:
            creator_type = creator_item['creatorType']

            if creator_type == 'editor':
                if 'name' in creator_item and creator_item['name'] != '':
                    editor_name = str(creator_item['name'].encode('utf-8'))
                    editor_first_surname = editor_name.split(' ')[-1]
                    editor_first_name = editor_name.replace(
                        ' ' + editor_first_surname, '')

                else:
                    editor_first_name = creator_item['firstName'].encode(
                        'utf-8')
                    editor_first_surname = creator_item['lastName'].encode(
                        'utf-8')

                editor_slug = slugify(
                    '%s %s' % (editor_first_name, editor_first_surname))

                editor = get_or_default(
                    Person,
                    slug=editor_slug,
                )

                if not editor:
                    nick = get_or_default(
                        Nickname,
                        slug=editor_slug,
                    )

                    if nick:
                        editor, created = Person.objects.get_or_create(
                            id=nick.person.id,
                            defaults={
                                'first_name': editor_first_name,
                                'first_surname': editor_first_surname,
                            },
                        )

                editors.append(editor)

    return editors
Esempio n. 4
0
def get_zotero_variables():
    zot = get_or_default(ZoteroConfiguration)

    if zot:
        return zot.api_key, zot.library_id, zot.library_type

    else:
        logger.warn(u"ZoteroConfiguration() object not configured in admin panel")

        return '', '', ''
Esempio n. 5
0
def _extract_editors(item):
    editors = []

    if 'creators' in item['data'] and len(item['data']['creators']) > 0:
        for creator_item in item['data']['creators']:
            creator_type = creator_item['creatorType']

            if creator_type == 'editor':
                if 'name' in creator_item and creator_item['name'] != '':
                    editor_name = str(creator_item['name'].encode('utf-8'))
                    editor_first_surname = editor_name.split(' ')[-1]
                    editor_first_name = editor_name.replace(' ' + editor_first_surname, '')

                else:
                    editor_first_name = creator_item['firstName'].encode('utf-8')
                    editor_first_surname = creator_item['lastName'].encode('utf-8')

                editor_slug = slugify('%s %s' % (editor_first_name, editor_first_surname))

                editor = get_or_default(
                    Person,
                    slug=editor_slug,
                )

                if not editor:
                    nick = get_or_default(
                        Nickname,
                        slug=editor_slug,
                    )

                    if nick:
                        editor, created = Person.objects.get_or_create(
                            id=nick.person.id,
                            defaults={
                                'first_name': editor_first_name,
                                'first_surname': editor_first_surname,
                            },
                        )

                editors.append(editor)

    return editors
Esempio n. 6
0
def parse_thesis(item):
    author = _extract_authors(item)[0]

    thesis = get_or_default(
        Thesis,
        slug=slugify(item['data']['title']),
    )

    if not thesis:
        logger.warn(u"")
        logger.warn(u"%s should register his/her thesis using labman's admin page" % author)
Esempio n. 7
0
def get_zotero_variables():

    zot = get_or_default(ZoteroConfiguration)

    if zot:
        return zot.api_key, zot.library_id, zot.library_type

    else:
        logger.warn(u"ZoteroConfiguration() object not configured in admin panel")

        return '', '', ''
def _extract_authors(item):
    authors = []

    if 'creators' in item['data'] and len(item['data']['creators']) > 0:
        for creator_item in item['data']['creators']:
            creator_type = creator_item['creatorType']

            if creator_type == 'author':
                if 'name' in creator_item and creator_item['name'] != '':
                    author_name = str(creator_item['name'].encode('utf-8'))
                    author_first_surname = author_name.split(' ')[-1]
                    author_first_name = author_name.replace(' ' + author_first_surname, '')

                else:
                    author_first_name = creator_item['firstName'].encode('utf-8')
                    author_first_surname = creator_item['lastName'].encode('utf-8')

                author_slug = slugify('%s %s' % (author_first_name, author_first_surname))

                author = get_or_default(
                    Person,
                    slug=author_slug,
                )

                if not author:
                    try:
                        nick = Nickname.objects.filter(slug=author_slug).first()

                    except:
                        nick = None

                    if nick:
                        author, created = Person.objects.get_or_create(
                            id=nick.person.id,
                            defaults={
                                'first_name': author_first_name,
                                'first_surname': author_first_surname,
                            },
                        )

                authors.append(author)

    return authors
Esempio n. 9
0
def parse_journal(item):
    journal_slug = nslugify(item['data']['publicationTitle'], _parse_date(item).year, item['data'].get('volume'), item['data'].get('issue'))

    journal = get_or_default(Journal, Journal(), slug=journal_slug)

    journal.title = item['data']['publicationTitle']

    journal.issn = _assign_if_exists(item, 'ISSN')
    journal.volume = _assign_if_exists(item, 'volume')
    journal.publisher = _assign_if_exists(item, 'publisher')
    journal.place = _assign_if_exists(item, 'place')
    journal.journal_abbreviation = _assign_if_exists(item, 'journalAbbrevation')
    journal.issue = _assign_if_exists(item, 'issue')

    journal.published = _parse_date(item)
    journal.year = journal.published.year

    journal.save()

    return journal
Esempio n. 10
0
def parse_magazine(item):

    magazine = get_or_default(
        Magazine,
        Magazine(),
        slug=nslugify(item['data']['publicationTitle'], _parse_date(item).year, item['data'].get('volume'), item['data'].get('issue')),
        year=_parse_date(item).year,
    )

    magazine.title = item['data']['publicationTitle']

    magazine.issn = _assign_if_exists(item, 'ISSN')
    magazine.volume = _assign_if_exists(item, 'volume')
    magazine.issue = _assign_if_exists(item, 'issue')

    magazine.published = _parse_date(item)
    magazine.year = magazine.published.year

    magazine.save()

    return magazine
Esempio n. 11
0
def extract_publications_from_zotero(from_version):

    from_version = int(from_version)
    last_zotero_version = get_last_zotero_version()

    if from_version == last_zotero_version:
        logger.info(u"Labman is updated to the last version in Zotero (%d)" % last_zotero_version)

        return []

    else:
        if from_version > last_zotero_version:

            # This should never happen, but just in case, we solve the error by syncing the penultimate version in Zotero
            from_version = last_zotero_version - 1
            logger.warn(u"Asked 'from_version' was greater than 'last_zotero_version'. Strange...")

        # In case of a reset, save relationships between publications and news
        publications_related_to_news = []

        if from_version == 0:
            logger.info(u"A publication DB reset is ordered")

            for item_to_be_saved in PublicationRelatedToNews.objects.all():
                publications_related_to_news.append((
                    item_to_be_saved.news.title,
                    item_to_be_saved.publication.title
                ))

            Publication.objects.all().delete()
            # Just in case any legacy ZoteroExtractorLog() instances are left behind
            ZoteroExtractorLog.objects.all().delete()

        # Retrieve publications since required zotero version
        logger.info(u"Getting items since version %d" % from_version)
        logger.info(u"Last version in Zotero is %d" % last_zotero_version)

        zot = get_zotero_connection()

        total_items = []
        start = 0
        limit = 100

        items = zot.items(since=from_version, limit=limit, start=start)
        total_items.extend(items)

        while len(items) > 0:
            start += limit

            logger.debug(u"%s results found. Trying with ?start=%s" % (len(items), start))

            items = zot.items(since=from_version, limit=limit, start=start)

            if items:
                logger.debug(u"Last paper added: %s" % (items[-1]['data']['dateAdded']))

            total_items.extend(items)

        items = total_items

        logger.info(u"")
        logger.info(u"%d new items (includes attachments as items)" % len(items))

        items_ordered = {}
        attachments = []

        # Versioning control
        highest_detected_version = 0
        highest_parsed_version = 0

        for item in items:
            if item['version'] > highest_detected_version:
                highest_detected_version = item['version']

            if item['data']['itemType'] == 'attachment':
                if 'filename' in item['data']:
                    if item['data']['filename'].lower().endswith(ACCEPTED_ATTACHMENT_FORMATS):
                        attachments.append(item)

                    else:
                        logger.warn(u"\tInvalid attachment >> %s" % item['data']['filename'])

                else:
                    logger.warn(u"Could not identify attachment's filename")

            else:
                item_id = item['key']
                items_ordered[item_id] = item

        attachment_number = 0

        for a in attachments:
            if 'parentItem' in a['data']:
                attachment_number += 1
                parent_id = a['data']['parentItem']

                if parent_id in items_ordered.keys():
                    items_ordered[parent_id]['attachment'] = a

                else:
                    # Only the attachment has been modified
                    parent_publication = zot.item(parent_id)
                    publications = Publication.objects.filter(zotero_key=parent_publication['key'])

                    for publication in publications:
                        _save_attachment(a['key'], publication.slug, a['data']['filename'])
            else:
                logger.warn(u"%s" % a['data'].get('title', 'The user did not even added a title'))

        number_of_items = len(items_ordered)

        logger.info(u"")
        logger.info(u"%d items to be parsed (%d attachments)" % (number_of_items, attachment_number))

        for pos, i_id in enumerate(items_ordered):
            item = items_ordered[i_id]
            publication_type = item['data']['itemType']

            logger.info(u"[%s/%s][%s] > %s" % (
                pos + 1,
                number_of_items,
                publication_type,
                item['data'].get('title', 'No title')
            ))

            generate_publication(item)

            if item['version'] > highest_parsed_version:
                highest_parsed_version = item['version']

        if highest_parsed_version == highest_detected_version:
            logger.info(u"Latest parsed item was a valid one")

        else:
            logger.info(u"Latest parsed item was not a valid one, versioning must be fixed")

            ZoteroExtractorLog.objects.create(
                item_key=u'VERSION_FIX',
                version=highest_detected_version,
            )

        # In case of a reset, reset saved relationships between publications and news
        if len(publications_related_to_news) > 0:
            logger.info(u"")
            logger.info(u"Restoring removed PublicationRelatedToNews() instances")

        for index, saved_link in enumerate(publications_related_to_news):
            news = get_or_default(News, title=saved_link[0])
            publication = get_or_default(Publication, title=saved_link[1])

            if news and publication:
                PublicationRelatedToNews.objects.create(
                    publication=publication,
                    news=news,
                )

                logger.info(u"[%d/%d] Link created" % (index + 1, len(publications_related_to_news)))

            else:
                logger.info(u"[%d/%d] Link NOT created: %s" % (index + 1, len(publications_related_to_news), str(saved_link)))
Esempio n. 12
0
def parse_conference(item, proceedings):
    if 'conferenceName' in item['data'] and item['data']['conferenceName'] != '':
        event = get_or_default(
            Event,
            Event(),
            slug=nslugify(item['data']['conferenceName'], _parse_date(item).year),
        )

        event.event_type = 'Academic event'

        event.full_name = item['data']['conferenceName']

        if 'place' in item['data'] and item['data']['place'] != '':
            places_list = item['data']['place'].split(', ')

            if len(places_list) == 2:
                city_name = places_list[0]
                country_name = places_list[1]

                event_location = ''

                if city_name and city_name != '':
                    city, created = City.objects.get_or_create(
                        slug=slugify(city_name),
                        defaults={
                            'full_name': city_name,
                        },
                    )

                else:
                    city = None

                if country_name and country_name != '' and city:
                    country, created = Country.objects.get_or_create(
                        slug=slugify(country_name),
                        defaults={
                            'full_name': country_name,
                        },
                    )

                    if city:
                        city.country = country
                        city.save()

                else:
                    country = None

                try:
                    event_location = '%s (%s)' % (city_name, country_name)
                except:
                    event_location = ''

                event.host_city = city
                event.host_country = country

                event.location = event_location

        event.start_date = _parse_date(item)
        event.year = event.start_date.year

        event.proceedings = proceedings

        event.save()

        return event

    else:
        return None
def _update_related_fields(nickname_object):
    wrong_person = get_or_default(
        Person,
        full_name=nickname_object.nickname,
    )

    if (wrong_person) and (wrong_person != nickname_object.person):

        for item in PersonRelatedToEvent.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToEvent() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonRelatedToNews.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToNews() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonSeeAlso.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonSeeAlso() instances')
            item.person = nickname_object.person
            item.save()

        for item in AccountProfile.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating AccountProfile() instances')
            item.person = nickname_object.person
            item.save()

        for item in Job.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating Job() instances')
            item.person = nickname_object.person
            item.save()

        for item in PhDProgramFollowedByPerson.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PhDProgramFollowedByPerson() instances')
            item.person = nickname_object.person
            item.save()

        for item in ThesisRegisteredByPerson.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating ThesisRegisteredByPerson() instances')
            item.person = nickname_object.person
            item.save()

        for item in AssignedPerson.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating AssignedPerson() instances')
            item.person = nickname_object.person
            item.save()

        for item in AssignedPersonTag.objects.filter(assigned_person__person=wrong_person):
            logger.info(u'\t\tUpdating AssignedPersonTag() instances')
            item.assigned_person = nickname_object.person
            item.save()

        for item in PublicationAuthor.objects.filter(author=wrong_person):
            logger.info(u'\t\tUpdating PublicationAuthor() instances')
            item.author = nickname_object.person
            item.save()

        for item in PublicationEditor.objects.filter(editor=wrong_person):
            logger.info(u'\t\tUpdating PublicationEditor() instances')
            item.editor = nickname_object.person
            item.save()

        for item in Thesis.objects.filter(author=wrong_person):
            logger.info(u'\t\tUpdating Thesis() instances')
            item.author = nickname_object.person
            item.save()

        for item in Thesis.objects.filter(advisor=wrong_person):
            logger.info(u'\t\tUpdating Thesis() instances')
            item.advisor = nickname_object.person
            item.save()

        for item in CoAdvisor.objects.filter(co_advisor=wrong_person):
            logger.info(u'\t\tUpdating CoAdvisor() instances')
            item.co_advisor = nickname_object.person
            item.save()

        for item in PersonRelatedToContribution.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToContribution() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonRelatedToTalkOrCourse.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToTalkOrCourse() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonRelatedToAward.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToAward() instances')
            item.person = nickname_object.person
            item.save()

        wrong_person.delete()
        logger.info(u'')
        logger.info(u'\t<%s> instance removed' % wrong_person.full_name)
Esempio n. 14
0
def _update_related_fields(nickname_object):
    wrong_person = get_or_default(
        Person,
        full_name=nickname_object.nickname,
    )

    if (wrong_person) and (wrong_person != nickname_object.person):

        for item in PersonRelatedToEvent.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToEvent() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonRelatedToNews.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToNews() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonSeeAlso.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonSeeAlso() instances')
            item.person = nickname_object.person
            item.save()

        for item in AccountProfile.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating AccountProfile() instances')
            item.person = nickname_object.person
            item.save()

        for item in Job.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating Job() instances')
            item.person = nickname_object.person
            item.save()

        for item in PhDProgramFollowedByPerson.objects.filter(
                person=wrong_person):
            logger.info(u'\t\tUpdating PhDProgramFollowedByPerson() instances')
            item.person = nickname_object.person
            item.save()

        for item in ThesisRegisteredByPerson.objects.filter(
                person=wrong_person):
            logger.info(u'\t\tUpdating ThesisRegisteredByPerson() instances')
            item.person = nickname_object.person
            item.save()

        for item in AssignedPerson.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating AssignedPerson() instances')
            item.person = nickname_object.person
            item.save()

        for item in AssignedPersonTag.objects.filter(
                assigned_person__person=wrong_person):
            logger.info(u'\t\tUpdating AssignedPersonTag() instances')
            item.assigned_person = nickname_object.person
            item.save()

        for item in PublicationAuthor.objects.filter(author=wrong_person):
            logger.info(u'\t\tUpdating PublicationAuthor() instances')
            item.author = nickname_object.person
            item.save()

        for item in PublicationEditor.objects.filter(editor=wrong_person):
            logger.info(u'\t\tUpdating PublicationEditor() instances')
            item.editor = nickname_object.person
            item.save()

        for item in Thesis.objects.filter(author=wrong_person):
            logger.info(u'\t\tUpdating Thesis() instances')
            item.author = nickname_object.person
            item.save()

        for item in Thesis.objects.filter(advisor=wrong_person):
            logger.info(u'\t\tUpdating Thesis() instances')
            item.advisor = nickname_object.person
            item.save()

        for item in CoAdvisor.objects.filter(co_advisor=wrong_person):
            logger.info(u'\t\tUpdating CoAdvisor() instances')
            item.co_advisor = nickname_object.person
            item.save()

        for item in PersonRelatedToContribution.objects.filter(
                person=wrong_person):
            logger.info(
                u'\t\tUpdating PersonRelatedToContribution() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonRelatedToTalkOrCourse.objects.filter(
                person=wrong_person):
            logger.info(
                u'\t\tUpdating PersonRelatedToTalkOrCourse() instances')
            item.person = nickname_object.person
            item.save()

        for item in PersonRelatedToAward.objects.filter(person=wrong_person):
            logger.info(u'\t\tUpdating PersonRelatedToAward() instances')
            item.person = nickname_object.person
            item.save()

        wrong_person.delete()
        logger.info(u'')
        logger.info(u'\t<%s> instance removed' % wrong_person.full_name)