def _bibconvert_escape(dummy_ctx, value): """Bridge to lxml to escape the provided value.""" try: if isinstance(value, str): string_value = value elif isinstance(value, (int, long)): string_value = str(value) elif isinstance(value, list): value = value[0] if isinstance(value, str): string_value = value elif isinstance(value, (int, long)): string_value = str(value) else: string_value = value.text else: string_value = value.text return encode_for_xml(string_value) except Exception as err: print("Error during formatting function evaluation: {0}".format(err), file=sys.stderr) return ''
def __new__(cls, original_string='', escape_quotes=False): if isinstance(original_string, EscapedString): escaped_string = str(original_string) else: if original_string and not str(original_string).strip(): escaped_string = ' ' else: escaped_string = encode_for_xml(str(original_string), wash=True, quote=escape_quotes) obj = str.__new__(cls, escaped_string) obj.original_string = original_string obj.escape_quotes = escape_quotes return obj
def _output_marc(output_complete, categories, kw_field=None, auth_field=None, acro_field=None, provenience='Classifier'): """Output the keywords in the MARCXML format. :var skw_matches: list of single keywords :var ckw_matches: list of composite keywords :var author_keywords: dictionary of extracted author keywords :var acronyms: dictionary of acronyms :var spires: boolean, True=generate spires output - BUT NOTE: it is here only not to break compatibility, in fact spires output should never be used for xml because if we read marc back into the KeywordToken objects, we would not find them :keyword provenience: string that identifies source (authority) that assigned the contents of the field :return: string, formatted MARC """ if kw_field is None: kw_field = cfg["CLASSIFIER_RECORD_KEYWORD_FIELD"] if auth_field is None: auth_field = cfg["CLASSIFIER_RECORD_KEYWORD_AUTHOR_FIELD"] if acro_field is None: acro_field = cfg["CLASSIFIER_RECORD_KEYWORD_ACRONYM_FIELD"] kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n' ' <subfield code="2">%s</subfield>\n' ' <subfield code="a">%s</subfield>\n' ' <subfield code="n">%s</subfield>\n' ' <subfield code="9">%s</subfield>\n' '</datafield>\n') output = [] tag, ind1, ind2 = _parse_marc_code(kw_field) for keywords in (output_complete["Single keywords"], output_complete["Core keywords"]): for kw in keywords: output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience), encode_for_xml(kw), keywords[kw], encode_for_xml(categories[kw]))) for field, keywords in ((auth_field, output_complete["Author keywords"]), (acro_field, output_complete["Acronyms"])): # field='' we shall not save the keywords if keywords and len(keywords) and field: tag, ind1, ind2 = _parse_marc_code(field) for kw, info in keywords.items(): output.append( kw_template % (tag, ind1, ind2, encode_for_xml(provenience), encode_for_xml(kw), '', encode_for_xml(categories[kw]))) return "".join(output)
def _output_marc(output_complete, categories, kw_field=None, auth_field=None, acro_field=None, provenience='Classifier'): """Output the keywords in the MARCXML format. :var skw_matches: list of single keywords :var ckw_matches: list of composite keywords :var author_keywords: dictionary of extracted author keywords :var acronyms: dictionary of acronyms :var spires: boolean, True=generate spires output - BUT NOTE: it is here only not to break compatibility, in fact spires output should never be used for xml because if we read marc back into the KeywordToken objects, we would not find them :keyword provenience: string that identifies source (authority) that assigned the contents of the field :return: string, formatted MARC """ if kw_field is None: kw_field = cfg["CLASSIFIER_RECORD_KEYWORD_FIELD"] if auth_field is None: auth_field = cfg["CLASSIFIER_RECORD_KEYWORD_AUTHOR_FIELD"] if acro_field is None: acro_field = cfg["CLASSIFIER_RECORD_KEYWORD_ACRONYM_FIELD"] kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n' ' <subfield code="2">%s</subfield>\n' ' <subfield code="a">%s</subfield>\n' ' <subfield code="n">%s</subfield>\n' ' <subfield code="9">%s</subfield>\n' '</datafield>\n') output = [] tag, ind1, ind2 = _parse_marc_code(kw_field) for keywords in (output_complete["Single keywords"], output_complete["Core keywords"]): for kw in keywords: output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience), encode_for_xml(kw), keywords[kw], encode_for_xml(categories[kw]))) for field, keywords in ((auth_field, output_complete["Author keywords"]), (acro_field, output_complete["Acronyms"])): # field='' we shall not save the keywords if keywords and len(keywords) and field: tag, ind1, ind2 = _parse_marc_code(field) for kw, info in keywords.items(): output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience), encode_for_xml(kw), '', encode_for_xml(categories[kw]))) return "".join(output)
def encode_for_marcxml(value): from invenio_utils.text import encode_for_xml if isinstance(value, unicode): value = value.encode('utf8') return encode_for_xml(str(value), wash=True)