def formatter(bwo, **kwargs): """Return a formatted version of the data.""" from invenio.modules.formatter.engine import format_record data = bwo.get_data() if not data: return '' formatter = kwargs.get("formatter", None) format = kwargs.get("format", None) if formatter: # A seperate formatter is supplied return formatter(data) from invenio.modules.records.api import Record if isinstance(data, collections.Mapping): # Dicts are cool on its own, but maybe its SmartJson (record) try: data = Record(data.dumps()).legacy_export_as_marc() except (TypeError, KeyError): # Maybe not, submission? return data if isinstance(data, string_types): # Its a string type, lets try to convert if format: # We can try formatter! # If already XML, format_record does not like it. if format != 'xm': try: return format_record(recID=None, of=format, xml_record=data) except TypeError: # Wrong kind of type pass else: # So, XML then from xml.dom.minidom import parseString try: pretty_data = parseString(data) return pretty_data.toprettyxml() except TypeError: # Probably not proper XML string then return "Data cannot be parsed: %s" % (data, ) except Exception: # Some other parsing error pass # Just return raw string return data if isinstance(data, set): return list(data) # Not any of the above types. How juicy! return data
def formatter(bwo, **kwargs): """Return a formatted version of the data.""" from invenio.modules.formatter.engine import format_record data = bwo.get_data() if not data: return '' formatter = kwargs.get("formatter", None) format = kwargs.get("format", None) if formatter: # A seperate formatter is supplied return formatter(data) from invenio.modules.records.api import Record if isinstance(data, collections.Mapping): # Dicts are cool on its own, but maybe its SmartJson (record) try: data = Record(data.dumps()).legacy_export_as_marc() except (TypeError, KeyError): # Maybe not, submission? return data if isinstance(data, string_types): # Its a string type, lets try to convert if format: # We can try formatter! # If already XML, format_record does not like it. if format != 'xm': try: return format_record(recID=None, of=format, xml_record=data) except TypeError: # Wrong kind of type pass else: # So, XML then from xml.dom.minidom import parseString try: pretty_data = parseString(data) return pretty_data.toprettyxml() except TypeError: # Probably not proper XML string then return "Data cannot be parsed: %s" % (data,) except Exception: # Some other parsing error pass # Just return raw string return data if isinstance(data, set): return list(data) # Not any of the above types. How juicy! return data
def formatter(bwo, **kwargs): """Return formatted data of object.""" from invenio.modules.formatter.engine import format_record deposit_object = Deposition(bwo) submission_data = deposit_object.get_latest_sip() marcxml = submission_data.package of = kwargs.get("format", "hd") if of == "xm": return marcxml else: return format_record(recID=None, of=kwargs.get("format", "hd"), xml_record=marcxml)
def formatter(bwo, **kwargs): """Return formatted data of object.""" from invenio.modules.formatter.engine import format_record deposit_object = Deposition(bwo) submission_data = deposit_object.get_latest_sip() marcxml = submission_data.package of = kwargs.get("format", "hd") if of == "xm": return marcxml else: return format_record( recID=None, of=kwargs.get("format", "hd"), xml_record=marcxml )
def extract_references(self, req, form): """Refrences extraction page This page can be used for authors to test their pdfs against our refrences extraction process""" user_info = collect_user_info(req) # Handle the 3 POST parameters if 'pdf' in form and form['pdf']: pdf = form['pdf'] references_xml = extract_from_pdf_string(pdf) elif 'arxiv' in form and form['arxiv']: url = make_arxiv_url(arxiv_id=form['arxiv']) references_xml = extract_references_from_url_xml(url) elif 'url' in form and form['url']: url = form['url'] references_xml = extract_references_from_url_xml(url) elif 'txt' in form and form['txt']: txt = form['txt'] references_xml = extract_references_from_string_xml(txt) else: references_xml = None # If we have not uploaded anything yet # Display the form that allows us to do so if not references_xml: out = self.extract_references_template() else: out = """ <style type="text/css"> #referenceinp_link { display: none; } </style> """ out += format_record(0, 'hdref', xml_record=references_xml.encode('utf-8'), user_info=user_info) # Render the page (including header, footer) return page(title='References Extractor', body=out, uid=user_info['uid'], req=req)
def extract(self, req, form): """Refrences extraction page This page can be used for authors to test their pdfs against our refrences extraction process""" user_info = collect_user_info(req) # Handle the 3 POST parameters if 'pdf' in form and form['pdf']: pdf = form['pdf'] references_xml = extract_from_pdf_string(pdf) elif 'arxiv' in form and form['arxiv']: url = make_arxiv_url(arxiv_id=form['arxiv']) references_xml = extract_references_from_url_xml(url) elif 'url' in form and form['url']: url = form['url'] references_xml = extract_references_from_url_xml(url) elif 'txt' in form and form['txt'].value: txt = form['txt'].value.decode('utf-8', 'ignore') references_xml = extract_references_from_string_xml(txt) else: references_xml = None # If we have not uploaded anything yet # Display the form that allows us to do so if not references_xml: out = docextract_templates.tmpl_web_form() else: references_html = format_record(0, 'hdref', xml_record=references_xml, user_info=user_info) out = docextract_templates.tmpl_web_result(references_html) # Render the page (including header, footer) return page(title='References Extractor', body=out, uid=user_info['uid'], req=req)
def format_records(recIDs, of, ln=None, verbose=0, search_pattern=None, xml_records=None, user_info=None, record_prefix=None, record_separator=None, record_suffix=None, prologue="", epilogue="", req=None, on_the_fly=False): """ Format records given by a list of record IDs or a list of records as xml. Add a prefix before each record, a suffix after each record, plus a separator between records. Also add optional prologue and epilogue to the complete formatted list. You can either specify a list of record IDs to format, or a list of xml records, but not both (if both are specified recIDs is ignored). 'record_separator' is a function that returns a string as separator between records. The function must take an integer as unique parameter, which is the index in recIDs (or xml_records) of the record that has just been formatted. For example separator(i) must return the separator between recID[i] and recID[i+1]. Alternatively separator can be a single string, which will be used to separate all formatted records. The same applies to 'record_prefix' and 'record_suffix'. 'req' is an optional parameter on which the result of the function are printed lively (prints records after records) if it is given. Note that you should set 'req' content-type by yourself, and send http header before calling this function as it will not do it. This function takes the same parameters as :meth:`format_record` except for: :param recIDs: a list of record IDs :type recIDs: list(int) :param of: an output format code (or short identifier for the output format) :type of: string :param ln: the language to use to format the record :type ln: string :param verbose: the level of verbosity from 0 to 9. - 0: silent - 5: errors - 7: errors and warnings, stop if error in format elements - 9: errors and warnings, stop if error (debug mode) :type verbose: int :param search_pattern: list of strings representing the user request in web interface :type search_pattern: list(string) :param user_info: the information of the user who will view the formatted page (if applicable) :param xml_records: a list of xml string representions of the records to format :type xml_records: list(string) :param record_prefix: a string printed before B{each} formatted records (n times) :type record_prefix: string :param record_suffix: a string printed after B{each} formatted records (n times) :type record_suffix: string :param prologue: a string printed at the beginning of the complete formatted records (1x) :type prologue: string :param epilogue: a string printed at the end of the complete formatted output (1x) :type epilogue: string :param record_separator: either a string or a function that returns string to join formatted records :param record_separator: string or function :param req: an optional request object where to print records :param on_the_fly: if False, try to return an already preformatted version of the record in the database :type on_the_fly: boolean :rtype: string """ if req is not None: req.write(prologue) formatted_records = '' # Fill one of the lists with Nones if xml_records is not None: recIDs = map(lambda x: None, xml_records) else: xml_records = map(lambda x: None, recIDs) total_rec = len(recIDs) last_iteration = False for i in range(total_rec): if i == total_rec - 1: last_iteration = True # Print prefix if record_prefix is not None: if isinstance(record_prefix, str): formatted_records += record_prefix if req is not None: req.write(record_prefix) else: string_prefix = record_prefix(i) formatted_records += string_prefix if req is not None: req.write(string_prefix) # Print formatted record ln = ln or cfg['CFG_SITE_LANG'] formatted_record = format_record(recIDs[i], of, ln, verbose, search_pattern, xml_records[i], user_info, on_the_fly) formatted_records += formatted_record if req is not None: req.write(formatted_record) # Print suffix if record_suffix is not None: if isinstance(record_suffix, str): formatted_records += record_suffix if req is not None: req.write(record_suffix) else: string_suffix = record_suffix(i) formatted_records += string_suffix if req is not None: req.write(string_suffix) # Print separator if needed if record_separator is not None and not last_iteration: if isinstance(record_separator, str): formatted_records += record_separator if req is not None: req.write(record_separator) else: string_separator = record_separator(i) formatted_records += string_separator if req is not None: req.write(string_separator) if req is not None: req.write(epilogue) return prologue + formatted_records + epilogue