Beispiel #1
0
def _bibconvert_escape(dummy_ctx, value):
    """Bridge to lxml to escape the provided value."""
    try:
        if isinstance(value, str):
            string_value = value
        elif isinstance(value, (int, long)):
            string_value = str(value)
        elif isinstance(value, list):
            value = value[0]
            if isinstance(value, str):
                string_value = value
            elif isinstance(value, (int, long)):
                string_value = str(value)
            else:
                string_value = value.text
        else:
            string_value = value.text

        return encode_for_xml(string_value)

    except Exception as err:
        print("Error during formatting function evaluation: {0}".format(err),
              file=sys.stderr)

    return ''
Beispiel #2
0
 def __new__(cls, original_string='', escape_quotes=False):
     if isinstance(original_string, EscapedString):
         escaped_string = str(original_string)
     else:
         if original_string and not str(original_string).strip():
             escaped_string = ' '
         else:
             escaped_string = encode_for_xml(str(original_string), wash=True, quote=escape_quotes)
     obj = str.__new__(cls, escaped_string)
     obj.original_string = original_string
     obj.escape_quotes = escape_quotes
     return obj
Beispiel #3
0
def _output_marc(output_complete,
                 categories,
                 kw_field=None,
                 auth_field=None,
                 acro_field=None,
                 provenience='Classifier'):
    """Output the keywords in the MARCXML format.

    :var skw_matches: list of single keywords
    :var ckw_matches: list of composite keywords
    :var author_keywords: dictionary of extracted author keywords
    :var acronyms: dictionary of acronyms
    :var spires: boolean, True=generate spires output - BUT NOTE: it is
            here only not to break compatibility, in fact spires output
            should never be used for xml because if we read marc back
            into the KeywordToken objects, we would not find them
    :keyword provenience: string that identifies source (authority) that
        assigned the contents of the field
    :return: string, formatted MARC
    """
    if kw_field is None:
        kw_field = cfg["CLASSIFIER_RECORD_KEYWORD_FIELD"]

    if auth_field is None:
        auth_field = cfg["CLASSIFIER_RECORD_KEYWORD_AUTHOR_FIELD"]

    if acro_field is None:
        acro_field = cfg["CLASSIFIER_RECORD_KEYWORD_ACRONYM_FIELD"]

    kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n'
                   '    <subfield code="2">%s</subfield>\n'
                   '    <subfield code="a">%s</subfield>\n'
                   '    <subfield code="n">%s</subfield>\n'
                   '    <subfield code="9">%s</subfield>\n'
                   '</datafield>\n')

    output = []

    tag, ind1, ind2 = _parse_marc_code(kw_field)
    for keywords in (output_complete["Single keywords"],
                     output_complete["Core keywords"]):
        for kw in keywords:
            output.append(kw_template %
                          (tag, ind1, ind2, encode_for_xml(provenience),
                           encode_for_xml(kw), keywords[kw],
                           encode_for_xml(categories[kw])))

    for field, keywords in ((auth_field, output_complete["Author keywords"]),
                            (acro_field, output_complete["Acronyms"])):
        # field='' we shall not save the keywords
        if keywords and len(keywords) and field:
            tag, ind1, ind2 = _parse_marc_code(field)
            for kw, info in keywords.items():
                output.append(
                    kw_template %
                    (tag, ind1, ind2, encode_for_xml(provenience),
                     encode_for_xml(kw), '', encode_for_xml(categories[kw])))

    return "".join(output)
Beispiel #4
0
def _output_marc(output_complete, categories,
                 kw_field=None,
                 auth_field=None,
                 acro_field=None,
                 provenience='Classifier'):
    """Output the keywords in the MARCXML format.

    :var skw_matches: list of single keywords
    :var ckw_matches: list of composite keywords
    :var author_keywords: dictionary of extracted author keywords
    :var acronyms: dictionary of acronyms
    :var spires: boolean, True=generate spires output - BUT NOTE: it is
            here only not to break compatibility, in fact spires output
            should never be used for xml because if we read marc back
            into the KeywordToken objects, we would not find them
    :keyword provenience: string that identifies source (authority) that
        assigned the contents of the field
    :return: string, formatted MARC
    """
    if kw_field is None:
        kw_field = cfg["CLASSIFIER_RECORD_KEYWORD_FIELD"]

    if auth_field is None:
        auth_field = cfg["CLASSIFIER_RECORD_KEYWORD_AUTHOR_FIELD"]

    if acro_field is None:
        acro_field = cfg["CLASSIFIER_RECORD_KEYWORD_ACRONYM_FIELD"]

    kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n'
                   '    <subfield code="2">%s</subfield>\n'
                   '    <subfield code="a">%s</subfield>\n'
                   '    <subfield code="n">%s</subfield>\n'
                   '    <subfield code="9">%s</subfield>\n'
                   '</datafield>\n')

    output = []

    tag, ind1, ind2 = _parse_marc_code(kw_field)
    for keywords in (output_complete["Single keywords"],
                     output_complete["Core keywords"]):
        for kw in keywords:
            output.append(kw_template % (tag, ind1, ind2,
                                         encode_for_xml(provenience),
                                         encode_for_xml(kw), keywords[kw],
                                         encode_for_xml(categories[kw])))

    for field, keywords in ((auth_field, output_complete["Author keywords"]),
                            (acro_field, output_complete["Acronyms"])):
        # field='' we shall not save the keywords
        if keywords and len(keywords) and field:
            tag, ind1, ind2 = _parse_marc_code(field)
            for kw, info in keywords.items():
                output.append(kw_template % (tag, ind1, ind2,
                                             encode_for_xml(provenience),
                                             encode_for_xml(kw), '',
                                             encode_for_xml(categories[kw])))

    return "".join(output)
Beispiel #5
0
 def encode_for_marcxml(value):
     from invenio_utils.text import encode_for_xml
     if isinstance(value, unicode):
         value = value.encode('utf8')
     return encode_for_xml(str(value), wash=True)
Beispiel #6
0
 def encode_for_marcxml(value):
     from invenio_utils.text import encode_for_xml
     if isinstance(value, unicode):
         value = value.encode('utf8')
     return encode_for_xml(str(value), wash=True)