def replace_references(recid, uid=None, txt=None, url=None): """Replace references for a record The record itself is not updated, the marc xml of the document with updated references is returned Parameters: * recid: the id of the record * txt: references in text mode * inspire: format of ther references """ # Parse references if txt is not None: references_xml = extract_references_from_string_xml(txt, is_only_references=True) elif url is not None: references_xml = extract_references_from_url_xml(url) else: references_xml = extract_references_from_record_xml(recid) references = create_record(references_xml) dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_contents(recid, uid) out_xml = None references_to_add = record_get_field_instances(references[0], tag="999", ind1="C", ind2="5") refextract_status = record_get_field_instances(references[0], tag="999", ind1="C", ind2="6") if references_to_add: # Replace 999 fields record_delete_fields(record, "999") record_add_fields(record, "999", references_to_add) record_add_fields(record, "999", refextract_status) # Update record references out_xml = record_xml_output(record) return out_xml
def extract_references_txt(self, req, form): """Extract references from plain text""" check_login(req) if 'txt' not in form: return 'No text specified' txt = form['txt'].stream.read() return extract_references_from_string_xml(txt, is_only_references=False)
def replace_references(recid, uid=None, txt=None, url=None): """Replace references for a record The record itself is not updated, the marc xml of the document with updated references is returned Parameters: * recid: the id of the record * txt: references in text mode * inspire: format of ther references """ # Parse references if txt is not None: references_xml = extract_references_from_string_xml( txt, is_only_references=True) elif url is not None: references_xml = extract_references_from_url_xml(url) else: references_xml = extract_references_from_record_xml(recid) references = create_record(references_xml) dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_contents( recid, uid) out_xml = None references_to_add = record_get_field_instances(references[0], tag='999', ind1='C', ind2='5') refextract_status = record_get_field_instances(references[0], tag='999', ind1='C', ind2='6') if references_to_add: # Replace 999 fields record_delete_fields(record, '999') record_add_fields(record, '999', references_to_add) record_add_fields(record, '999', refextract_status) # Update record references out_xml = record_xml_output(record) return out_xml
def extract_references(self, req, form): """Refrences extraction page This page can be used for authors to test their pdfs against our refrences extraction process""" user_info = collect_user_info(req) # Handle the 3 POST parameters if 'pdf' in form and form['pdf']: pdf = form['pdf'] references_xml = extract_from_pdf_string(pdf) elif 'arxiv' in form and form['arxiv']: url = make_arxiv_url(arxiv_id=form['arxiv']) references_xml = extract_references_from_url_xml(url) elif 'url' in form and form['url']: url = form['url'] references_xml = extract_references_from_url_xml(url) elif 'txt' in form and form['txt']: txt = form['txt'] references_xml = extract_references_from_string_xml(txt) else: references_xml = None # If we have not uploaded anything yet # Display the form that allows us to do so if not references_xml: out = self.extract_references_template() else: out = """ <style type="text/css"> #referenceinp_link { display: none; } </style> """ out += format_record(0, 'hdref', xml_record=references_xml.encode('utf-8'), user_info=user_info) # Render the page (including header, footer) return page(title='References Extractor', body=out, uid=user_info['uid'], req=req)
def extract(self, req, form): """Refrences extraction page This page can be used for authors to test their pdfs against our refrences extraction process""" user_info = collect_user_info(req) # Handle the 3 POST parameters if 'pdf' in form and form['pdf']: pdf = form['pdf'] references_xml = extract_from_pdf_string(pdf) elif 'arxiv' in form and form['arxiv']: url = make_arxiv_url(arxiv_id=form['arxiv']) references_xml = extract_references_from_url_xml(url) elif 'url' in form and form['url']: url = form['url'] references_xml = extract_references_from_url_xml(url) elif 'txt' in form and form['txt'].value: txt = form['txt'].value.decode('utf-8', 'ignore') references_xml = extract_references_from_string_xml(txt) else: references_xml = None # If we have not uploaded anything yet # Display the form that allows us to do so if not references_xml: out = docextract_templates.tmpl_web_form() else: references_html = format_record(0, 'hdref', xml_record=references_xml, user_info=user_info) out = docextract_templates.tmpl_web_result(references_html) # Render the page (including header, footer) return page(title='References Extractor', body=out, uid=user_info['uid'], req=req)