def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" def _is_arxiv_url(url): return 'arxiv.org' in url form_fields = copy.deepcopy(formdata) filter_empty_elements(form_fields, ['authors', 'supervisors', 'report_numbers']) builder = LiteratureBuilder(source='submitter') for author in form_fields.get('authors', []): builder.add_author( builder.make_author(author['full_name'], affiliations=force_list(author['affiliation']) if author['affiliation'] else None, roles=['author'])) for supervisor in form_fields.get('supervisors', []): builder.add_author( builder.make_author( supervisor['full_name'], affiliations=force_list(supervisor['affiliation']) if author['affiliation'] else None, roles=['supervisor'])) builder.add_title(title=form_fields.get('title')) document_type = 'conference paper' if form_fields.get('conf_name') \ else form_fields.get('type_of_doc', []) if document_type == 'chapter': document_type = 'book chapter' builder.add_document_type(document_type=document_type) builder.add_abstract( abstract=form_fields.get('abstract'), source='arXiv' if form_fields.get('categories') else None) if form_fields.get('arxiv_id') and form_fields.get('categories'): builder.add_arxiv_eprint( arxiv_id=form_fields.get('arxiv_id'), arxiv_categories=form_fields.get('categories').split()) builder.add_doi(doi=form_fields.get('doi')) builder.add_inspire_categories( subject_terms=form_fields.get('subject_term'), source='user') for key in ('extra_comments', 'nonpublic_note', 'hidden_notes', 'conf_name', 'references'): builder.add_private_note(private_notes=form_fields.get(key)) year = form_fields.get('year') try: year = int(year) except (TypeError, ValueError): year = None builder.add_preprint_date( preprint_date=form_fields.get('preprint_created')) if form_fields.get('type_of_doc') == 'thesis': builder.add_thesis(defense_date=form_fields.get('defense_date'), degree_type=form_fields.get('degree_type'), institution=form_fields.get('institution'), date=form_fields.get('thesis_date')) if form_fields.get('type_of_doc') == 'chapter': if not form_fields.get('journal_title'): builder.add_book_series(title=form_fields.get('series_title')) if form_fields.get('type_of_doc') == 'book': if form_fields.get('journal_title'): form_fields['volume'] = form_fields.get('series_volume') else: builder.add_book_series(title=form_fields.get('series_title'), volume=form_fields.get('series_volume')) builder.add_book(publisher=form_fields.get('publisher_name'), place=form_fields.get('publication_place'), date=form_fields.get('publication_date')) builder.add_publication_info( year=year, cnum=form_fields.get('conference_id'), journal_issue=form_fields.get('issue'), journal_title=form_fields.get('journal_title'), journal_volume=form_fields.get('volume'), page_start=form_fields.get('start_page'), page_end=form_fields.get('end_page'), artid=form_fields.get('artid'), parent_record=form_fields.get('parent_book')) builder.add_accelerator_experiments_legacy_name( legacy_name=form_fields.get('experiment')) language = form_fields.get('other_language') \ if form_fields.get('language') == 'oth' \ else form_fields.get('language') builder.add_language(language=language) if form_fields.get('title_translation'): builder.add_title_translation( title=form_fields['title_translation'], language='en', ) builder.add_title(title=form_fields.get('title_arXiv'), source='arXiv') builder.add_title(title=form_fields.get('title_crossref'), source='crossref') builder.add_license(url=form_fields.get('license_url')) builder.add_public_note(public_note=form_fields.get('public_notes')) builder.add_public_note( public_note=form_fields.get('note'), source='arXiv' if form_fields.get('categories') else 'CrossRef') form_url = form_fields.get('url') form_additional_url = form_fields.get('additional_url') if form_url and not _is_arxiv_url(form_url): obj.extra_data['submission_pdf'] = form_url if not form_additional_url: builder.add_url(url=form_url) if form_additional_url and not _is_arxiv_url(form_additional_url): builder.add_url(url=form_additional_url) [ builder.add_report_number( report_number=report_number.get('report_number')) for report_number in form_fields.get('report_numbers', []) ] builder.add_collaboration(collaboration=form_fields.get('collaboration')) builder.add_acquisition_source( datetime=datetime.datetime.utcnow().isoformat(), submission_number=obj.id, internal_uid=int(obj.id_user), email=form_fields.get('email'), orcid=form_fields.get('orcid'), method='submitter') builder.validate_record() return builder.record
def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" def _is_arxiv_url(url): return 'arxiv.org' in url form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['authors', 'supervisors', 'report_numbers'] ) builder = LiteratureBuilder(source='submitter') for author in form_fields.get('authors', []): builder.add_author(builder.make_author( author['full_name'], affiliations=force_list(author['affiliation']) if author['affiliation'] else None, roles=['author'] )) for supervisor in form_fields.get('supervisors', []): builder.add_author(builder.make_author( supervisor['full_name'], affiliations=force_list(supervisor['affiliation']) if author['affiliation'] else None, roles=['supervisor'] )) builder.add_title(title=form_fields.get('title')) document_type = 'conference paper' if form_fields.get('conf_name') \ else form_fields.get('type_of_doc', []) builder.add_document_type( document_type=document_type ) builder.add_abstract( abstract=form_fields.get('abstract'), source='arXiv' if form_fields.get('categories') else None ) if form_fields.get('arxiv_id') and form_fields.get('categories'): builder.add_arxiv_eprint( arxiv_id=form_fields.get('arxiv_id'), arxiv_categories=form_fields.get('categories').split() ) builder.add_doi(doi=form_fields.get('doi')) builder.add_inspire_categories( subject_terms=form_fields.get('subject_term'), source='user' ) for key in ('extra_comments', 'nonpublic_note', 'hidden_notes', 'conf_name', 'references'): builder.add_private_note( private_notes=form_fields.get(key) ) year = form_fields.get('year') try: year = int(year) except (TypeError, ValueError): year = None builder.add_publication_info( year=year, cnum=form_fields.get('conference_id'), journal_issue=form_fields.get('issue'), journal_title=form_fields.get('journal_title'), journal_volume=form_fields.get('volume'), page_start=form_fields.get('page_start'), page_end=form_fields.get('page_end'), artid=form_fields.get('artid') ) builder.add_preprint_date( preprint_date=form_fields.get('preprint_created') ) if form_fields.get('type_of_doc') == 'thesis': builder.add_thesis( defense_date=form_fields.get('defense_date'), degree_type=form_fields.get('degree_type'), institution=form_fields.get('institution'), date=form_fields.get('thesis_date') ) builder.add_accelerator_experiments_legacy_name( legacy_name=form_fields.get('experiment') ) language = form_fields.get('other_language') \ if form_fields.get('language') == 'oth' \ else form_fields.get('language') builder.add_language(language=language) builder.add_title_translation(title=form_fields.get('title_translation')) builder.add_title( title=form_fields.get('title_arXiv'), source='arXiv' ) builder.add_title( title=form_fields.get('title_crossref'), source='crossref' ) builder.add_license(url=form_fields.get('license_url')) builder.add_public_note(public_note=form_fields.get('public_notes')) builder.add_public_note( public_note=form_fields.get('note'), source='arXiv' if form_fields.get('categories') else 'CrossRef' ) form_url = form_fields.get('url') form_additional_url = form_fields.get('additional_url') if form_url and not _is_arxiv_url(form_url): obj.extra_data['submission_pdf'] = form_url if not form_additional_url: builder.add_url(url=form_url) if form_additional_url and not _is_arxiv_url(form_additional_url): builder.add_url(url=form_additional_url) [builder.add_report_number( report_number=report_number.get('report_number') ) for report_number in form_fields.get('report_numbers', [])] builder.add_collaboration(collaboration=form_fields.get('collaboration')) builder.add_acquisition_source( datetime=datetime.datetime.utcnow().isoformat(), submission_number=obj.id, internal_uid=int(obj.id_user), email=form_fields.get('email'), orcid=form_fields.get('orcid'), method='submitter' ) builder.validate_record() return builder.record
def hepcrawl_to_hep(crawler_record): """ Args: crawler_record(dict): dictionary representing the hepcrawl formatted record. Returns: dict: The hep formatted (and validated) record. Raises: Exception: if there was a validation error (the exact class depends on :class:`inspire_schemas.api.validate`). """ def _filter_affiliation(affiliations): return [ affilation.get('value') for affilation in affiliations if affilation.get('value') ] builder = LiteratureBuilder( source=crawler_record['acquisition_source']['source']) for author in crawler_record.get('authors', []): builder.add_author( builder.make_author( author['full_name'], affiliations=_filter_affiliation(author['affiliations']), )) for title in crawler_record.get('titles', []): builder.add_title(title=title.get('title'), source=title.get('source')) for abstract in crawler_record.get('abstracts', []): builder.add_abstract(abstract=abstract.get('value'), source=abstract.get('source')) for arxiv_eprint in crawler_record.get('arxiv_eprints', []): builder.add_arxiv_eprint( arxiv_id=arxiv_eprint.get('value'), arxiv_categories=arxiv_eprint.get('categories')) for doi in crawler_record.get('dois', []): builder.add_doi( doi=doi.get('value'), material=doi.get('material'), ) for public_note in crawler_record.get('public_notes', []): builder.add_public_note(public_note=public_note.get('value'), source=public_note.get('source')) for license in crawler_record.get('license', []): builder.add_license( url=license.get('url'), license=license.get('license'), material=license.get('material'), ) for collaboration in crawler_record.get('collaborations', []): builder.add_collaboration(collaboration=collaboration.get('value')) for imprint in crawler_record.get('imprints', []): builder.add_imprint_date(imprint_date=imprint.get('date')) for copyright in crawler_record.get('copyright', []): builder.add_copyright(holder=copyright.get('holder'), material=copyright.get('material'), statement=copyright.get('statement')) builder.add_preprint_date( preprint_date=crawler_record.get('preprint_date')) acquisition_source = crawler_record.get('acquisition_source', {}) builder.add_acquisition_source( method=acquisition_source['method'], date=acquisition_source['datetime'], source=acquisition_source['source'], submission_number=acquisition_source['submission_number'], ) try: builder.add_number_of_pages( number_of_pages=int(crawler_record.get('page_nr', [])[0])) except (TypeError, ValueError, IndexError): pass publication_types = [ 'introductory', 'lectures', 'review', ] special_collections = [ 'cdf-internal-note', 'cdf-note', 'cds', 'd0-internal-note', 'd0-preliminary-note', 'h1-internal-note', 'h1-preliminary-note', 'halhidden', 'hephidden', 'hermes-internal-note', 'larsoft-internal-note', 'larsoft-note', 'zeus-internal-note', 'zeus-preliminary-note', ] document_types = [ 'book', 'note', 'report', 'proceedings', 'thesis', ] added_doc_type = False for collection in crawler_record.get('collections', []): collection = collection['primary'].strip().lower() if collection == 'arxiv': continue # ignored elif collection == 'citeable': builder.set_citeable(True) elif collection == 'core': builder.set_core(True) elif collection == 'noncore': builder.set_core(False) elif collection == 'published': builder.set_refereed(True) elif collection == 'withdrawn': builder.set_withdrawn(True) elif collection in publication_types: builder.add_publication_type(collection) elif collection in special_collections: builder.add_special_collection(collection.upper()) elif collection == 'bookchapter': added_doc_type = True builder.add_document_type('book chapter') elif collection == 'conferencepaper': added_doc_type = True builder.add_document_type('conference paper') elif collection in document_types: added_doc_type = True builder.add_document_type(collection) if not added_doc_type: builder.add_document_type('article') _pub_info = crawler_record.get('publication_info', [{}])[0] builder.add_publication_info( year=_pub_info.get('year'), artid=_pub_info.get('artid'), page_end=_pub_info.get('page_end'), page_start=_pub_info.get('page_start'), journal_issue=_pub_info.get('journal_issue'), journal_title=_pub_info.get('journal_title'), journal_volume=_pub_info.get('journal_volume'), pubinfo_freetext=_pub_info.get('pubinfo_freetext'), material=_pub_info.get('pubinfo_material'), ) for report_number in crawler_record.get('report_numbers', []): builder.add_report_number(report_number=report_number.get('value'), source=report_number.get('source')) builder.validate_record() return builder.record