def parse_book(item): book = get_or_default( Book, Book(), slug=nslugify(item['data']['bookTitle'], _parse_date(item).year, item['data'].get('volume'), item['data'].get('series')), year=_parse_date(item).year, ) book.title = item['data']['bookTitle'] book.isbn = _assign_if_exists(item, 'ISBN') book.volume = _assign_if_exists(item, 'volume') book.series = _assign_if_exists(item, 'series') book.publisher = _assign_if_exists(item, 'publisher') book.place = _assign_if_exists(item, 'place') book.published = _parse_date(item) book.year = book.published.year book.save() _save_publication_editors(_extract_editors(item), book) return book
def parse_proceedings(item): if item['data']['proceedingsTitle'] != '': proceedings_title = item['data']['proceedingsTitle'] else: if item['data']['conferenceName'] != '': proceedings_title = 'Proceedings of conference: %s' % item['data']['conferenceName'] else: proceedings_title = 'Proceedings for article: %s' % item['data']['title'] proceedings = get_or_default( Proceedings, Proceedings(), slug=nslugify(proceedings_title, _parse_date(item).year, item['data'].get('volume')), year=_parse_date(item).year, ) proceedings.title = proceedings_title proceedings.isbn = _assign_if_exists(item, 'ISBN') proceedings.volume = _assign_if_exists(item, 'volume') proceedings.series = _assign_if_exists(item, 'series') proceedings.publisher = _assign_if_exists(item, 'publisher') proceedings.place = _assign_if_exists(item, 'place') proceedings.published = _parse_date(item) proceedings.year = proceedings.published.year proceedings.save() return proceedings
def _extract_editors(item): editors = [] if 'creators' in item['data'] and len(item['data']['creators']) > 0: for creator_item in item['data']['creators']: creator_type = creator_item['creatorType'] if creator_type == 'editor': if 'name' in creator_item and creator_item['name'] != '': editor_name = str(creator_item['name'].encode('utf-8')) editor_first_surname = editor_name.split(' ')[-1] editor_first_name = editor_name.replace( ' ' + editor_first_surname, '') else: editor_first_name = creator_item['firstName'].encode( 'utf-8') editor_first_surname = creator_item['lastName'].encode( 'utf-8') editor_slug = slugify( '%s %s' % (editor_first_name, editor_first_surname)) editor = get_or_default( Person, slug=editor_slug, ) if not editor: nick = get_or_default( Nickname, slug=editor_slug, ) if nick: editor, created = Person.objects.get_or_create( id=nick.person.id, defaults={ 'first_name': editor_first_name, 'first_surname': editor_first_surname, }, ) editors.append(editor) return editors
def get_zotero_variables(): zot = get_or_default(ZoteroConfiguration) if zot: return zot.api_key, zot.library_id, zot.library_type else: logger.warn(u"ZoteroConfiguration() object not configured in admin panel") return '', '', ''
def _extract_editors(item): editors = [] if 'creators' in item['data'] and len(item['data']['creators']) > 0: for creator_item in item['data']['creators']: creator_type = creator_item['creatorType'] if creator_type == 'editor': if 'name' in creator_item and creator_item['name'] != '': editor_name = str(creator_item['name'].encode('utf-8')) editor_first_surname = editor_name.split(' ')[-1] editor_first_name = editor_name.replace(' ' + editor_first_surname, '') else: editor_first_name = creator_item['firstName'].encode('utf-8') editor_first_surname = creator_item['lastName'].encode('utf-8') editor_slug = slugify('%s %s' % (editor_first_name, editor_first_surname)) editor = get_or_default( Person, slug=editor_slug, ) if not editor: nick = get_or_default( Nickname, slug=editor_slug, ) if nick: editor, created = Person.objects.get_or_create( id=nick.person.id, defaults={ 'first_name': editor_first_name, 'first_surname': editor_first_surname, }, ) editors.append(editor) return editors
def parse_thesis(item): author = _extract_authors(item)[0] thesis = get_or_default( Thesis, slug=slugify(item['data']['title']), ) if not thesis: logger.warn(u"") logger.warn(u"%s should register his/her thesis using labman's admin page" % author)
def _extract_authors(item): authors = [] if 'creators' in item['data'] and len(item['data']['creators']) > 0: for creator_item in item['data']['creators']: creator_type = creator_item['creatorType'] if creator_type == 'author': if 'name' in creator_item and creator_item['name'] != '': author_name = str(creator_item['name'].encode('utf-8')) author_first_surname = author_name.split(' ')[-1] author_first_name = author_name.replace(' ' + author_first_surname, '') else: author_first_name = creator_item['firstName'].encode('utf-8') author_first_surname = creator_item['lastName'].encode('utf-8') author_slug = slugify('%s %s' % (author_first_name, author_first_surname)) author = get_or_default( Person, slug=author_slug, ) if not author: try: nick = Nickname.objects.filter(slug=author_slug).first() except: nick = None if nick: author, created = Person.objects.get_or_create( id=nick.person.id, defaults={ 'first_name': author_first_name, 'first_surname': author_first_surname, }, ) authors.append(author) return authors
def parse_journal(item): journal_slug = nslugify(item['data']['publicationTitle'], _parse_date(item).year, item['data'].get('volume'), item['data'].get('issue')) journal = get_or_default(Journal, Journal(), slug=journal_slug) journal.title = item['data']['publicationTitle'] journal.issn = _assign_if_exists(item, 'ISSN') journal.volume = _assign_if_exists(item, 'volume') journal.publisher = _assign_if_exists(item, 'publisher') journal.place = _assign_if_exists(item, 'place') journal.journal_abbreviation = _assign_if_exists(item, 'journalAbbrevation') journal.issue = _assign_if_exists(item, 'issue') journal.published = _parse_date(item) journal.year = journal.published.year journal.save() return journal
def parse_magazine(item): magazine = get_or_default( Magazine, Magazine(), slug=nslugify(item['data']['publicationTitle'], _parse_date(item).year, item['data'].get('volume'), item['data'].get('issue')), year=_parse_date(item).year, ) magazine.title = item['data']['publicationTitle'] magazine.issn = _assign_if_exists(item, 'ISSN') magazine.volume = _assign_if_exists(item, 'volume') magazine.issue = _assign_if_exists(item, 'issue') magazine.published = _parse_date(item) magazine.year = magazine.published.year magazine.save() return magazine
def extract_publications_from_zotero(from_version): from_version = int(from_version) last_zotero_version = get_last_zotero_version() if from_version == last_zotero_version: logger.info(u"Labman is updated to the last version in Zotero (%d)" % last_zotero_version) return [] else: if from_version > last_zotero_version: # This should never happen, but just in case, we solve the error by syncing the penultimate version in Zotero from_version = last_zotero_version - 1 logger.warn(u"Asked 'from_version' was greater than 'last_zotero_version'. Strange...") # In case of a reset, save relationships between publications and news publications_related_to_news = [] if from_version == 0: logger.info(u"A publication DB reset is ordered") for item_to_be_saved in PublicationRelatedToNews.objects.all(): publications_related_to_news.append(( item_to_be_saved.news.title, item_to_be_saved.publication.title )) Publication.objects.all().delete() # Just in case any legacy ZoteroExtractorLog() instances are left behind ZoteroExtractorLog.objects.all().delete() # Retrieve publications since required zotero version logger.info(u"Getting items since version %d" % from_version) logger.info(u"Last version in Zotero is %d" % last_zotero_version) zot = get_zotero_connection() total_items = [] start = 0 limit = 100 items = zot.items(since=from_version, limit=limit, start=start) total_items.extend(items) while len(items) > 0: start += limit logger.debug(u"%s results found. Trying with ?start=%s" % (len(items), start)) items = zot.items(since=from_version, limit=limit, start=start) if items: logger.debug(u"Last paper added: %s" % (items[-1]['data']['dateAdded'])) total_items.extend(items) items = total_items logger.info(u"") logger.info(u"%d new items (includes attachments as items)" % len(items)) items_ordered = {} attachments = [] # Versioning control highest_detected_version = 0 highest_parsed_version = 0 for item in items: if item['version'] > highest_detected_version: highest_detected_version = item['version'] if item['data']['itemType'] == 'attachment': if 'filename' in item['data']: if item['data']['filename'].lower().endswith(ACCEPTED_ATTACHMENT_FORMATS): attachments.append(item) else: logger.warn(u"\tInvalid attachment >> %s" % item['data']['filename']) else: logger.warn(u"Could not identify attachment's filename") else: item_id = item['key'] items_ordered[item_id] = item attachment_number = 0 for a in attachments: if 'parentItem' in a['data']: attachment_number += 1 parent_id = a['data']['parentItem'] if parent_id in items_ordered.keys(): items_ordered[parent_id]['attachment'] = a else: # Only the attachment has been modified parent_publication = zot.item(parent_id) publications = Publication.objects.filter(zotero_key=parent_publication['key']) for publication in publications: _save_attachment(a['key'], publication.slug, a['data']['filename']) else: logger.warn(u"%s" % a['data'].get('title', 'The user did not even added a title')) number_of_items = len(items_ordered) logger.info(u"") logger.info(u"%d items to be parsed (%d attachments)" % (number_of_items, attachment_number)) for pos, i_id in enumerate(items_ordered): item = items_ordered[i_id] publication_type = item['data']['itemType'] logger.info(u"[%s/%s][%s] > %s" % ( pos + 1, number_of_items, publication_type, item['data'].get('title', 'No title') )) generate_publication(item) if item['version'] > highest_parsed_version: highest_parsed_version = item['version'] if highest_parsed_version == highest_detected_version: logger.info(u"Latest parsed item was a valid one") else: logger.info(u"Latest parsed item was not a valid one, versioning must be fixed") ZoteroExtractorLog.objects.create( item_key=u'VERSION_FIX', version=highest_detected_version, ) # In case of a reset, reset saved relationships between publications and news if len(publications_related_to_news) > 0: logger.info(u"") logger.info(u"Restoring removed PublicationRelatedToNews() instances") for index, saved_link in enumerate(publications_related_to_news): news = get_or_default(News, title=saved_link[0]) publication = get_or_default(Publication, title=saved_link[1]) if news and publication: PublicationRelatedToNews.objects.create( publication=publication, news=news, ) logger.info(u"[%d/%d] Link created" % (index + 1, len(publications_related_to_news))) else: logger.info(u"[%d/%d] Link NOT created: %s" % (index + 1, len(publications_related_to_news), str(saved_link)))
def parse_conference(item, proceedings): if 'conferenceName' in item['data'] and item['data']['conferenceName'] != '': event = get_or_default( Event, Event(), slug=nslugify(item['data']['conferenceName'], _parse_date(item).year), ) event.event_type = 'Academic event' event.full_name = item['data']['conferenceName'] if 'place' in item['data'] and item['data']['place'] != '': places_list = item['data']['place'].split(', ') if len(places_list) == 2: city_name = places_list[0] country_name = places_list[1] event_location = '' if city_name and city_name != '': city, created = City.objects.get_or_create( slug=slugify(city_name), defaults={ 'full_name': city_name, }, ) else: city = None if country_name and country_name != '' and city: country, created = Country.objects.get_or_create( slug=slugify(country_name), defaults={ 'full_name': country_name, }, ) if city: city.country = country city.save() else: country = None try: event_location = '%s (%s)' % (city_name, country_name) except: event_location = '' event.host_city = city event.host_country = country event.location = event_location event.start_date = _parse_date(item) event.year = event.start_date.year event.proceedings = proceedings event.save() return event else: return None
def _update_related_fields(nickname_object): wrong_person = get_or_default( Person, full_name=nickname_object.nickname, ) if (wrong_person) and (wrong_person != nickname_object.person): for item in PersonRelatedToEvent.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToEvent() instances') item.person = nickname_object.person item.save() for item in PersonRelatedToNews.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToNews() instances') item.person = nickname_object.person item.save() for item in PersonSeeAlso.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonSeeAlso() instances') item.person = nickname_object.person item.save() for item in AccountProfile.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating AccountProfile() instances') item.person = nickname_object.person item.save() for item in Job.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating Job() instances') item.person = nickname_object.person item.save() for item in PhDProgramFollowedByPerson.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PhDProgramFollowedByPerson() instances') item.person = nickname_object.person item.save() for item in ThesisRegisteredByPerson.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating ThesisRegisteredByPerson() instances') item.person = nickname_object.person item.save() for item in AssignedPerson.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating AssignedPerson() instances') item.person = nickname_object.person item.save() for item in AssignedPersonTag.objects.filter(assigned_person__person=wrong_person): logger.info(u'\t\tUpdating AssignedPersonTag() instances') item.assigned_person = nickname_object.person item.save() for item in PublicationAuthor.objects.filter(author=wrong_person): logger.info(u'\t\tUpdating PublicationAuthor() instances') item.author = nickname_object.person item.save() for item in PublicationEditor.objects.filter(editor=wrong_person): logger.info(u'\t\tUpdating PublicationEditor() instances') item.editor = nickname_object.person item.save() for item in Thesis.objects.filter(author=wrong_person): logger.info(u'\t\tUpdating Thesis() instances') item.author = nickname_object.person item.save() for item in Thesis.objects.filter(advisor=wrong_person): logger.info(u'\t\tUpdating Thesis() instances') item.advisor = nickname_object.person item.save() for item in CoAdvisor.objects.filter(co_advisor=wrong_person): logger.info(u'\t\tUpdating CoAdvisor() instances') item.co_advisor = nickname_object.person item.save() for item in PersonRelatedToContribution.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToContribution() instances') item.person = nickname_object.person item.save() for item in PersonRelatedToTalkOrCourse.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToTalkOrCourse() instances') item.person = nickname_object.person item.save() for item in PersonRelatedToAward.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToAward() instances') item.person = nickname_object.person item.save() wrong_person.delete() logger.info(u'') logger.info(u'\t<%s> instance removed' % wrong_person.full_name)
def _update_related_fields(nickname_object): wrong_person = get_or_default( Person, full_name=nickname_object.nickname, ) if (wrong_person) and (wrong_person != nickname_object.person): for item in PersonRelatedToEvent.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToEvent() instances') item.person = nickname_object.person item.save() for item in PersonRelatedToNews.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToNews() instances') item.person = nickname_object.person item.save() for item in PersonSeeAlso.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonSeeAlso() instances') item.person = nickname_object.person item.save() for item in AccountProfile.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating AccountProfile() instances') item.person = nickname_object.person item.save() for item in Job.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating Job() instances') item.person = nickname_object.person item.save() for item in PhDProgramFollowedByPerson.objects.filter( person=wrong_person): logger.info(u'\t\tUpdating PhDProgramFollowedByPerson() instances') item.person = nickname_object.person item.save() for item in ThesisRegisteredByPerson.objects.filter( person=wrong_person): logger.info(u'\t\tUpdating ThesisRegisteredByPerson() instances') item.person = nickname_object.person item.save() for item in AssignedPerson.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating AssignedPerson() instances') item.person = nickname_object.person item.save() for item in AssignedPersonTag.objects.filter( assigned_person__person=wrong_person): logger.info(u'\t\tUpdating AssignedPersonTag() instances') item.assigned_person = nickname_object.person item.save() for item in PublicationAuthor.objects.filter(author=wrong_person): logger.info(u'\t\tUpdating PublicationAuthor() instances') item.author = nickname_object.person item.save() for item in PublicationEditor.objects.filter(editor=wrong_person): logger.info(u'\t\tUpdating PublicationEditor() instances') item.editor = nickname_object.person item.save() for item in Thesis.objects.filter(author=wrong_person): logger.info(u'\t\tUpdating Thesis() instances') item.author = nickname_object.person item.save() for item in Thesis.objects.filter(advisor=wrong_person): logger.info(u'\t\tUpdating Thesis() instances') item.advisor = nickname_object.person item.save() for item in CoAdvisor.objects.filter(co_advisor=wrong_person): logger.info(u'\t\tUpdating CoAdvisor() instances') item.co_advisor = nickname_object.person item.save() for item in PersonRelatedToContribution.objects.filter( person=wrong_person): logger.info( u'\t\tUpdating PersonRelatedToContribution() instances') item.person = nickname_object.person item.save() for item in PersonRelatedToTalkOrCourse.objects.filter( person=wrong_person): logger.info( u'\t\tUpdating PersonRelatedToTalkOrCourse() instances') item.person = nickname_object.person item.save() for item in PersonRelatedToAward.objects.filter(person=wrong_person): logger.info(u'\t\tUpdating PersonRelatedToAward() instances') item.person = nickname_object.person item.save() wrong_person.delete() logger.info(u'') logger.info(u'\t<%s> instance removed' % wrong_person.full_name)