def arxiv_id(self, key, value): from invenio.utils.persistentid import is_arxiv_post_2007 if is_arxiv_post_2007(value): arxiv_rep_number = {'primary': 'arXiv:' + value, 'source': 'arXiv'} else: arxiv_rep_number = {'primary': value, 'source': 'arXiv'} if len(value.split('/')) == 2: arxiv_rep_number['arxiv_category'] = value.split('/')[0] if 'report_number' in self: self['report_number'].append(arxiv_rep_number) else: self['report_number'] = [arxiv_rep_number]
def get_curation_body(template, metadata, email, extra_data): """ Get ticket content. Ticket used by curators to curate the given record. """ from invenio.utils.persistentid import is_arxiv_post_2007 recid = extra_data.get('recid') record_url = extra_data.get('url') arxiv_id = metadata.get('arxiv_id') if arxiv_id and is_arxiv_post_2007(arxiv_id): arxiv_id = ''.join(['arXiv:', arxiv_id]) report_number = metadata.get('report_numbers') if report_number: report_number = report_number[0].get('value') link_to_pdf = extra_data.get('submission_data').get('pdf') subject = ' '.join(filter(lambda x: x is not None, [arxiv_id, " ".join(["doi:{0}".format(d) for d in metadata.get('dois')]), report_number, '(#{0})'.format(recid)])) references = extra_data.get('submission_data').get('references') user_comment = extra_data.get('submission_data').get('extra_comments') body = render_template( template, recid=recid, record_url=record_url, link_to_pdf=link_to_pdf, email=email, references=references, user_comment=user_comment, ).strip() return subject, body
def process_sip_metadata(cls, deposition, metadata): """Map fields to match jsonalchemy configuration.""" delete_keys = [] field_list = ['abstract', 'title'] # maps from a form field to the corresponding MarcXML field field_map = {'abstract': "summary", 'title': "title", 'subject_term': "term", 'institution': "university", 'degree_type': 'degree_type', 'thesis_date': "date", 'journal_title': "journal_title", 'page_range_article_id': "page_artid", 'volume': "journal_volume", 'year': "year", 'issue': "journal_issue", 'conference_id': "cnum"} # exclusive fields for each type of document doc_exclusive_fields = {'article': ['journal_title', 'page_range', 'article_id', 'volume', 'year', 'issue', 'conference_id'], 'thesis': ['supervisors', 'institution', 'degree_type', 'thesis_date', 'defense_date'], } del doc_exclusive_fields[metadata['type_of_doc']] def remove_exclusive_fields(fieldlist): for field in fieldlist: if field in metadata and metadata[field]: del metadata[field] map(remove_exclusive_fields, doc_exclusive_fields.values()) filter_empty_elements(metadata) # ============================ # Abstract, Title and Subjects # ============================ for field in field_list: if field in metadata: tmp_field = metadata[field] metadata[field] = {field_map[field]: tmp_field} if "subject_term" in metadata: tmp_field = metadata["subject_term"] metadata["subject_term"] = [{"term": t, "scheme": "INSPIRE", "source": "submitter"} for t in tmp_field] # ======= # Authors # ======= metadata['authors'] = filter(None, metadata['authors']) if 'authors' in metadata and metadata['authors']: first_author = metadata['authors'][0].get('full_name').split(',') if len(first_author) > 1 and \ literature.match_authors_initials(first_author[1]): first_author[1] = first_author[1].replace(' ', '') metadata['authors'][0]['full_name'] = ", ".join(first_author) metadata['_first_author'] = metadata['authors'][0] if metadata['authors'][1:]: metadata['_additional_authors'] = metadata['authors'][1:] for k in metadata['_additional_authors']: try: additional_author = k.get('full_name').split(',') if len(additional_author) > 1 and \ literature.match_authors_initials(additional_author[1]): additional_author[1] = additional_author[1].replace(' ', '') k['full_name'] = ", ".join(additional_author) except AttributeError: pass delete_keys.append('authors') # =========== # Supervisors # =========== if 'supervisors' in metadata and metadata['supervisors']: metadata['thesis_supervisor'] = metadata['supervisors'] delete_keys.append('supervisors') # ==== # Note # ==== if metadata.get('note', None): metadata['note'] = [{'value': metadata['note']}] # ============== # Thesis related # ============== thesis_fields = filter(lambda field: field in metadata, ['institution', 'degree_type', 'thesis_date']) if thesis_fields: metadata['thesis'] = {} for field in thesis_fields: metadata['thesis'][field_map[field]] = metadata[field] delete_keys.extend(thesis_fields) if 'defense_date' in metadata and metadata['defense_date']: defense_note = { 'value': 'Presented on ' + metadata['defense_date'] } if metadata.get('note', None): metadata['note'].append(defense_note) else: metadata['note'] = [defense_note] # ======== # Category # ======== metadata['collections'] = [{'primary': "HEP"}] if metadata['type_of_doc'] == 'thesis': metadata['collections'].append({'primary': "THESIS"}) # ============ # Title source # ============ if 'title_source' in metadata and metadata['title_source']: metadata['title']['source'] = metadata['title_source'] delete_keys.append('title_source') # ============= # Report number # ============= if 'report_numbers' in metadata and metadata['report_numbers']: user_report_number = metadata['report_numbers'] metadata['report_number'] = [{'primary': v['report_number']} for v in user_report_number] delete_keys.append('report_numbers') # ======== # arXiv ID # ======== imported_from_arXiv = filter(lambda field: field in metadata, ['categories', 'title_arXiv']) if imported_from_arXiv or metadata.get('title_source') == 'arXiv': if is_arxiv_post_2007(metadata['arxiv_id']): arxiv_rep_number = {'primary': 'arXiv:' + metadata['arxiv_id'], 'source': 'arXiv'} else: arxiv_rep_number = {'primary': metadata['arxiv_id'], 'source': 'arXiv'} if len(metadata['arxiv_id'].split('/')) == 2: arxiv_rep_number['arxiv_category'] = metadata['arxiv_id'].split('/')[0] if metadata.get('report_numbers'): metadata['report_number'].append(arxiv_rep_number) else: metadata['report_number'] = [arxiv_rep_number] if 'abstract' in metadata: metadata['abstract']['source'] = 'arXiv' if 'title_arXiv' in metadata: title_arXiv = metadata['title_arXiv'] metadata['title_arXiv'] = {} metadata['title_arXiv']['value'] = title_arXiv metadata['title_arXiv']['source'] = 'arXiv' if 'categories' in metadata and metadata['categories']: # arXiv subject categories subject_list = [{"term": c, "scheme": "arXiv"} for c in metadata['categories'].split()] # INSPIRE subject categories if 'subject_term' in metadata and metadata['subject_term']: metadata['subject_term'].extend(subject_list) else: metadata['subject_term'] = subject_list metadata['system_number_external'] = {'value': 'oai:arXiv.org:' + metadata['arxiv_id'], 'institute': 'arXiv'} metadata['collections'].extend([{'primary': "arXiv"}, {'primary': "Citeable"}]) # ======== # Language # ======== if metadata['language'] not in ('en', 'oth'): metadata['language'] = unicode(dict(LiteratureForm.languages).get(metadata['language'])) elif metadata['language'] == 'oth': if metadata['other_language']: metadata['language'] = metadata['other_language'] else: delete_keys.append('language') # ========== # Experiment # ========== if 'experiment' in metadata: metadata['accelerator_experiment'] = {'experiment': metadata['experiment']} delete_keys.append('experiment') # =============== # Conference Info # =============== if 'conf_name' in metadata: if 'nonpublic_note' in metadata: field = [metadata['nonpublic_note'], metadata['conf_name']] metadata['nonpublic_note'] = field else: metadata['nonpublic_note'] = [metadata['conf_name']] metadata['collections'].extend([{'primary': "ConferencePaper"}]) delete_keys.append('conf_name') # ======= # License # ======= licenses_kb = dict([(x['key'], x['value']) for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_LICENSE_KB"])]) if 'license' in metadata and metadata['license']: metadata['license'] = {'license': metadata['license']} if 'license_url' in metadata: metadata['license']['url'] = metadata['license_url'] else: metadata['license']['url'] = licenses_kb.get( metadata['license']['license']) elif 'license_url' in metadata: metadata['license'] = {'url': metadata['license_url']} license_key = {v: k for k, v in licenses_kb.items()}.get( metadata['license_url']) if license_key: metadata['license']['license'] = license_key delete_keys.append('license_url') # =========== # Files (FFT) # =========== if 'fft' in metadata and metadata['fft']: def restructure_ffts(fft): fft['url'] = fft['path'] fft['description'] = fft['name'] fft['docfile_type'] = "INSPIRE-PUBLIC" del fft['path'], fft['name'] map(restructure_ffts, metadata['fft']) # ==== # URLs # ==== if metadata.get('url'): metadata['pdf'] = metadata['url'] if isinstance(metadata['url'], string_types): metadata['url'] = [{'url': metadata['url']}] if 'additional_url' in metadata and metadata['additional_url']: if metadata.get('url'): metadata['url'].append({'url': metadata['additional_url']}) else: metadata['url'] = [{'url': metadata['additional_url']}] delete_keys.append('additional_url') # ================ # Publication Info # ================ publication_fields = filter(lambda field: field in metadata, ['journal_title', 'page_range_article_id', 'volume', 'year', 'issue', 'conference_id']) if publication_fields: metadata['publication_info'] = {} for field in publication_fields: metadata['publication_info'][field_map[field]] = metadata[field] if 'page_nr' not in metadata and 'page_range_article_id' in publication_fields: pages = metadata['page_range_article_id'].split('-') if len(pages) == 2: try: metadata['page_nr'] = int(pages[1]) - int(pages[0]) + 1 except ValueError: pass if {'primary': "ConferencePaper"} not in metadata['collections']: metadata['collections'].append({'primary': "Published"}) delete_keys.extend(publication_fields) if 'journal_title' in metadata: journals_kb = dict([(x['key'].lower(), x['value']) for x in get_kb_mappings(cfg.get("DEPOSIT_INSPIRE_JOURNALS_KB"))]) metadata['publication_info']['journal_title'] = journals_kb.get(metadata['journal_title'].lower(), metadata['journal_title']) if 'nonpublic_note' in metadata: if (isinstance(metadata['nonpublic_note'], list) and len(metadata['nonpublic_note']) > 1): del metadata['nonpublic_note'][0] else: delete_keys.append('nonpublic_note') # ============= # Preprint Info # ============= if 'created' in metadata and metadata['created']: metadata['preprint_info'] = {'date': metadata['created']} delete_keys.append('created') # ========== # Owner Info # ========== userid = deposition.user_id user = UserInfo(userid) email = user.info.get('email', '') external_ids = UserEXT.query.filter_by(id_user=userid).all() sources = ["{0}{1}".format('inspire:uid:', userid)] sources.extend(["{0}:{1}".format(e_id.method, e_id.id) for e_id in external_ids]) metadata['acquisition_source'] = dict( source=sources, email=email, method="submission", submission_number=deposition.id, ) # ============== # Extra comments # ============== if 'extra_comments' in metadata and metadata['extra_comments']: metadata['hidden_note'] = [{'value': metadata['extra_comments'], 'source': 'submitter'}] # =================== # Delete useless data # =================== for key in delete_keys: del metadata[key]