def page_range_article_id(self, key, value): page_start, page_end, artid = split_page_artid(value) self.setdefault('publication_info', [{}])[0].update(dict( page_start=page_start, page_end=page_end, artid=artid)) raise IgnoreKey
def split_page_range_article_id(obj, formdata): page_range_article_id = formdata.get('page_range_article_id') if page_range_article_id: page_start, page_end, artid = split_page_artid(page_range_article_id) formdata['page_start'] = page_start formdata['page_end'] = page_end formdata['artid'] = artid return formdata
def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" publication_info = get_value(obj.data, "publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: if not pubnote: continue freetext = pubnote.get("pubinfo_freetext") if freetext: if isinstance(freetext, (list, tuple)): freetext = ". ".join(freetext) extracted_publication_info = extract_journal_reference( freetext, # override_kbs_files={ # 'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME']) # } ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume" ) if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title" ) if "year" in extracted_publication_info: year = maybe_int(extracted_publication_info.get('year')) if year is not None: pubnote['year'] = year if "page" in extracted_publication_info: page_start, page_end, artid = split_page_artid( extracted_publication_info.get("page")) if page_start: pubnote["page_start"] = page_start if page_end: pubnote["page_end"] = page_end if artid: pubnote["artid"] = artid if any(value for value in pubnote.values()): new_publication_info.append(pubnote) obj.data["publication_info"] = new_publication_info
def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" publication_info = get_value(obj.data, "publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: if not pubnote: continue freetext = pubnote.get("pubinfo_freetext") if not freetext: continue if isinstance(freetext, (list, tuple)): freetext = ". ".join(freetext) extracted_publication_info = extract_journal_reference( freetext, # override_kbs_files={ # 'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME']) # } ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume") if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title") if "year" in extracted_publication_info: pubnote["year"] = int(extracted_publication_info.get("year")) if "page" in extracted_publication_info: page_start, page_end, artid = split_page_artid( extracted_publication_info.get("page")) if page_start: pubnote["page_start"] = page_start if page_end: pubnote["page_end"] = page_end if artid: pubnote["artid"] = artid new_publication_info.append(pubnote) obj.data["publication_info"] = new_publication_info
def publication_info(self, key, value): def _get_material(value): schema = load_schema('elements/material') valid_materials = schema['enum'] m_value = force_single_element(value.get('m', '')) for material in valid_materials: if m_value.lower() == material: return material year = maybe_int(force_single_element(value.get('y'))) parent_recid = maybe_int(force_single_element(value.get('0'))) journal_recid = maybe_int(force_single_element(value.get('1'))) conference_recid = maybe_int(force_single_element(value.get('2'))) parent_record = get_record_ref(parent_recid, 'literature') conference_record = get_record_ref(conference_recid, 'conferences') journal_record = get_record_ref(journal_recid, 'journals') page_start, page_end, artid = split_page_artid(value.get('c')) res = { 'parent_record': parent_record, 'conference_record': conference_record, 'journal_record': journal_record, 'page_start': page_start, 'page_end': page_end, 'artid': artid, 'journal_issue': force_single_element(value.get('n')), 'conf_acronym': force_single_element(value.get('o')), 'journal_title': force_single_element(value.get('p')), 'parent_report_number': force_single_element(value.get('r')), 'journal_volume': force_single_element(value.get('v')), 'cnum': force_single_element(value.get('w')), 'pubinfo_freetext': force_single_element(value.get('x')), 'year': year, 'parent_isbn': force_single_element(value.get('z')), 'material': _get_material(value), 'hidden': key.startswith('7731') or None, } return res
def publication_info(self, key, value): """Publication info about record.""" def get_int_value(val): if val: out = force_force_list(val)[0] if out.isdigit(): out = int(out) return out return None year = get_int_value(value.get('y')) parent_recid = get_int_value(value.get('0')) journal_recid = get_int_value(value.get('1')) conference_recid = get_int_value(value.get('2')) parent_record = get_record_ref(parent_recid, 'literature') conference_record = get_record_ref(conference_recid, 'conferences') journal_record = get_record_ref(journal_recid, 'journals') page_start, page_end, artid = split_page_artid(value.get('c')) res = { 'parent_record': parent_record, 'conference_record': conference_record, 'journal_record': journal_record, 'page_start': page_start, 'page_end': page_end, 'artid': artid, 'journal_issue': value.get('n'), 'conf_acronym': value.get('o'), 'journal_title': value.get('p'), 'reportnumber': value.get('r'), 'confpaper_info': value.get('t'), 'journal_volume': value.get('v'), 'cnum': force_force_list(value.get('w')), 'pubinfo_freetext': value.get('x'), 'year': year, 'isbn': value.get('z'), 'note': value.get('m'), } return res
def publication_info(self, key, value): """Publication info about record.""" def get_int_value(val): if val: out = force_force_list(val)[0] if out.isdigit(): out = int(out) return out return None year = get_int_value(value.get('y')) parent_recid = get_int_value(value.get('0')) journal_recid = get_int_value(value.get('1')) conference_recid = get_int_value(value.get('2')) parent_record = get_record_ref(parent_recid, 'literature') conference_record = get_record_ref(conference_recid, 'conferences') journal_record = get_record_ref(journal_recid, 'journals') page_start, page_end, artid = split_page_artid(value.get('c')) res = { 'parent_record': parent_record, 'conference_record': conference_record, 'journal_record': journal_record, 'page_start': page_start, 'page_end': page_end, 'artid': artid, 'journal_issue': value.get('n'), 'conf_acronym': value.get('o'), 'journal_title': value.get('p'), 'reportnumber': value.get('r'), 'confpaper_info': value.get('t'), 'journal_volume': value.get('v'), 'cnum': force_force_list(value.get('w')), 'pubinfo_freetext': value.get('x'), 'year': year, 'isbn': value.get('z'), 'notes': dedupe_list(force_force_list(value.get('m'))), } return res