Example #1
0
def format_element(bfo,
                   name,
                   tag_name='',
                   tag='',
                   kb='',
                   kb_default_output='',
                   var='',
                   protocol='googlescholar'):
    """Prints a custom field in a way suitable to be used in HTML META
    tags.  In particular conforms to Google Scholar harvesting protocol as
    defined http://scholar.google.com/intl/en/scholar/inclusion.html and
    Open Graph http://ogp.me/

    @param tag_name: the name, from tag table, of the field to be exported
    looks initially for names prefixed by "meta-"<tag_name>
    then looks for exact name, then falls through to "tag"
    @param tag: the MARC tag to be exported (only if not defined by tag_name)
    @param name: name to be displayed in the meta headers, labelling this value.
    @param kb: a knowledge base through which to process the retrieved value if necessary.
    @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we
               return? Either return the given parameter or specify "{value}" to return the retrieved
               value before processing though kb.
    @param var: the name of a variable to output instead of field from metadata.
                Allowed values are those supported by bfe_server_info and
                bfe_client_info. Overrides <code>name</code> and <code>tag_name</code>
    @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph'
    @see: bfe_server_info.py, bfe_client_info.py
    """
    if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR:
        return ""
    elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH:
        return ""

    tags = []
    if var:
        # delegate to bfe_server_info or bfe_client_info:
        value = server_info(bfo, var)
        if value.startswith("Unknown variable: "):
            # Oops variable was not defined there
            value = client_info(bfo, var)
        return not value.startswith("Unknown variable: ") and \
               create_metatag(name=name, content=cgi.escape(value, True)) \
               or ""
    elif tag_name:
        # First check for special meta named tags
        tags = get_field_tags("meta-" + tag_name)
        if not tags:
            # then check for regular tags
            tags = get_field_tags(tag_name)
    if not tags and tag:
        # fall back to explicit marc tag
        tags = [tag]
    if not tags:
        return ''
    out = []
    values = [bfo.fields(marctag, escape=9) for marctag in tags]
    for value in values:
        if isinstance(value, list):
            for val in value:
                if isinstance(val, dict):
                    out.extend(val.values())
                else:
                    out.append(val)
        elif isinstance(value, dict):
            out.extend(value.values())
        else:
            out.append(value)
    out = dict(zip(out, len(out) * [''])).keys()  # Remove duplicates
    if name == 'citation_date':
        for idx in range(len(out)):
            out[idx] = out[idx].replace('-', '/')

    if kb:
        if kb_default_output == "{value}":
            out = [bfo.kb(kb, value, value) for value in out]
        else:
            out = [bfo.kb(kb, value, kb_default_output) for value in out]
    return '\n'.join(
        [create_metatag(name=name, content=value) for value in out])
Example #2
0
def format_element(bfo,
                   name,
                   tag_name='',
                   tag='',
                   kb='',
                   kb_default_output='',
                   var='',
                   protocol='googlescholar'):
    """Prints a custom field in a way suitable to be used in HTML META
    tags.  In particular conforms to Google Scholar harvesting protocol as
    defined http://scholar.google.com/intl/en/scholar/inclusion.html and
    Open Graph http://ogp.me/

    @param tag_name: the name, from tag table, of the field to be exported
    looks initially for names prefixed by "meta-"<tag_name>
    then looks for exact name, then falls through to "tag"
    @param tag: the MARC tag to be exported (only if not defined by tag_name). Comma-separated list of tags.
    @param name: name to be displayed in the meta headers, labelling this value.
    @param kb: a knowledge base through which to process the retrieved value if necessary.
    @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we
               return? Either return the given parameter or specify "{value}" to return the retrieved
               value before processing though kb.
    @param var: the name of a variable to output instead of field from metadata.
                Allowed values are those supported by bfe_server_info and
                bfe_client_info. Overrides <code>name</code> and <code>tag_name</code>
    @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph'
    @see: bfe_server_info.py, bfe_client_info.py
    """
    if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR:
        return ""
    elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH:
        return ""

    matched_by_tag_name_p = False
    tags = []
    if var:
        # delegate to bfe_server_info or bfe_client_info:
        value = server_info(bfo, var)
        if value.startswith("Unknown variable: "):
            # Oops variable was not defined there
            value = client_info(bfo, var)
        return not value.startswith("Unknown variable: ") and \
               create_metatag(name=name, content=cgi.escape(value, True)) \
               or ""
    elif tag_name:
        # First check for special meta named tags
        tags = get_field_tags("meta-" + tag_name)
        if not tags:
            # then check for regular tags
            tags = get_field_tags(tag_name)
        matched_by_tag_name_p = tags and True or False
    if not tags and tag:
        # fall back to explicit marc tag
        if ',' in tag:
            tags = tag.split(',')
        else:
            tags = [tag]
    if not tags:
        return ''
    out = []

    if protocol == 'googlescholar' and \
      (tags == ['100__a'] or tags == ['700__a']):
        # Authors for Google Scholar: remove names that are not purely
        # author (thesis director, coordinator, etc). Assume that
        # existence of $e subfield is a sign. Since this assumption
        # might be wrong, put some strong conditions in order to get
        # into this branch, with easy way to bypass.
        values = [
            field_instance[tags[0][-1]]
            for field_instance in bfo.fields(tags[0][:-1], escape=9)
            if 'e' not in field_instance and tags[0][-1] in field_instance
        ]
    else:
        # Standard fetching of values
        values = [bfo.fields(marctag, escape=9) for marctag in tags]

    if name == 'citation_dissertation_institution':
        if CFG_CERN_SITE and \
          'THESIS' in bfo.fields('980__a'):
            authors = bfo.fields('100__', escape=9)
            authors.extend(bfo.fields('700__', escape=9))
            values = [field_instance['u'] for field_instance in authors \
              if not field_instance.has_key('e') and  field_instance.has_key('u')]
        elif tag == '100__u' and not matched_by_tag_name_p:
            # TODO: find way to map correctly this tag
            values = []

    for value in values:
        if isinstance(value, list):
            for val in value:
                if isinstance(val, dict):
                    out.extend(val.values())
                else:
                    out.append(val)
        elif isinstance(value, dict):
            out.extend(value.values())
        else:
            out.append(value)

    if name == 'citation_title':
        out = [' : '.join(out)]

    if name == 'citation_date':
        for idx in range(len(out)):
            out[idx] = out[idx].replace('-', '/')

    elif name == 'citation_publication_date':
        for idx in range(len(out)):
            # Stop at first match
            parsed_date = parse_date_for_googlescholar(out[idx])
            if parsed_date:
                out = [parsed_date]
                break

    out = dict(zip(out, len(out) * [''])).keys()  # Remove duplicates

    if kb:
        if kb_default_output == "{value}":
            out = [bfo.kb(kb, value, value) for value in out]
        else:
            out = [bfo.kb(kb, value, kb_default_output) for value in out]
    return '\n'.join(
        [create_metatag(name=name, content=value) for value in out])
Example #3
0
def format_element(bfo, name, tag_name='', tag='', kb='', kb_default_output='', var='', protocol='googlescholar'):
    """Prints a custom field in a way suitable to be used in HTML META
    tags.  In particular conforms to Google Scholar harvesting protocol as
    defined http://scholar.google.com/intl/en/scholar/inclusion.html and
    Open Graph http://ogp.me/

    @param tag_name: the name, from tag table, of the field to be exported
    looks initially for names prefixed by "meta-"<tag_name>
    then looks for exact name, then falls through to "tag"
    @param tag: the MARC tag to be exported (only if not defined by tag_name). Comma-separated list of tags.
    @param name: name to be displayed in the meta headers, labelling this value.
    @param kb: a knowledge base through which to process the retrieved value if necessary.
    @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we
               return? Either return the given parameter or specify "{value}" to return the retrieved
               value before processing though kb.
    @param var: the name of a variable to output instead of field from metadata.
                Allowed values are those supported by bfe_server_info and
                bfe_client_info. Overrides <code>name</code> and <code>tag_name</code>
    @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph'
    @see: bfe_server_info.py, bfe_client_info.py
    """
    if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR:
        return ""
    elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH:
        return ""

    matched_by_tag_name_p = False
    tags = []
    if var:
        # delegate to bfe_server_info or bfe_client_info:
        value = server_info(bfo, var)
        if value.startswith("Unknown variable: "):
            # Oops variable was not defined there
            value = client_info(bfo, var)
        return not value.startswith("Unknown variable: ") and \
               create_metatag(name=name, content=cgi.escape(value, True)) \
               or ""
    elif tag_name:
        # First check for special meta named tags
        tags = get_field_tags("meta-" + tag_name)
        if not tags:
            # then check for regular tags
            tags = get_field_tags(tag_name)
        matched_by_tag_name_p = tags and True or False
    if not tags and tag:
        # fall back to explicit marc tag
        if ',' in tag:
            tags = tag.split(',')
        else:
            tags = [tag]
    if not tags:
        return ''
    out = []

    if protocol == 'googlescholar' and \
      (tags == ['100__a'] or tags == ['700__a']):
      # Authors for Google Scholar: remove names that are not purely
      # author (thesis director, coordinator, etc). Assume that
      # existence of $e subfield is a sign. Since this assumption
      # might be wrong, put some strong conditions in order to get
      # into this branch, with easy way to bypass.
      values = [field_instance[tags[0][-1]] for field_instance in bfo.fields(tags[0][:-1], escape=9) \
                if not field_instance.has_key('e')]
    else:
        # Standard fetching of values
        values = [bfo.fields(marctag, escape=9) for marctag in tags]


    if name == 'citation_dissertation_institution':
        if CFG_CERN_SITE and \
          'THESIS' in bfo.fields('980__a'):
                authors = bfo.fields('100__', escape=9)
                authors.extend(bfo.fields('700__', escape=9))
                values = [field_instance['u'] for field_instance in authors \
                  if not field_instance.has_key('e') and  field_instance.has_key('u')]
        elif tag == '100__u' and not matched_by_tag_name_p:
            # TODO: find way to map correctly this tag
            values = []

    for value in values:
        if isinstance(value, list):
            for val in value:
                if isinstance(val, dict):
                    out.extend(val.values())
                else:
                    out.append(val)
        elif isinstance(value, dict):
            out.extend(value.values())
        else:
            out.append(value)

    if name == 'citation_date':
        for idx in range(len(out)):
            out[idx] = out[idx].replace('-', '/')

    elif name == 'citation_publication_date':
        for idx in range(len(out)):
            # Stop at first match
            parsed_date = parse_date_for_googlescholar(out[idx])
            if parsed_date:
                out = [parsed_date]
                break

    out = dict(zip(out, len(out)*[''])).keys() # Remove duplicates

    if kb:
        if kb_default_output == "{value}":
            out = [bfo.kb(kb, value, value) for value in out]
        else:
            out = [bfo.kb(kb, value, kb_default_output) for value in out]
    return '\n'.join([create_metatag(name=name, content=value) for value in out])
Example #4
0
def format_element(bfo, name, tag_name='', tag='', kb='', kb_default_output='', var='', protocol='googlescholar'):
    """Prints a custom field in a way suitable to be used in HTML META
    tags.  In particular conforms to Google Scholar harvesting protocol as
    defined http://scholar.google.com/intl/en/scholar/inclusion.html and
    Open Graph http://ogp.me/

    @param tag_name: the name, from tag table, of the field to be exported
    looks initially for names prefixed by "meta-"<tag_name>
    then looks for exact name, then falls through to "tag"
    @param tag: the MARC tag to be exported (only if not defined by tag_name)
    @param name: name to be displayed in the meta headers, labelling this value.
    @param kb: a knowledge base through which to process the retrieved value if necessary.
    @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we
               return? Either return the given parameter or specify "{value}" to return the retrieved
               value before processing though kb.
    @param var: the name of a variable to output instead of field from metadata.
                Allowed values are those supported by bfe_server_info and
                bfe_client_info. Overrides <code>name</code> and <code>tag_name</code>
    @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph'
    @see: bfe_server_info.py, bfe_client_info.py
    """
    if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR:
        return ""
    elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH:
        return ""

    tags = []
    if var:
        # delegate to bfe_server_info or bfe_client_info:
        value = server_info(bfo, var)
        if value.startswith("Unknown variable: "):
            # Oops variable was not defined there
            value = client_info(bfo, var)
        return not value.startswith("Unknown variable: ") and \
               create_metatag(name=name, content=cgi.escape(value, True)) \
               or ""
    elif tag_name:
        # First check for special meta named tags
        tags = get_field_tags("meta-" + tag_name)
        if not tags:
            # then check for regular tags
            tags = get_field_tags(tag_name)
    if not tags and tag:
        # fall back to explicit marc tag
        tags = [tag]
    if not tags:
        return ''
    out = []
    values = [bfo.fields(marctag, escape=9) for marctag in tags]
    for value in values:
        if isinstance(value, list):
            for val in value:
                if isinstance(val, dict):
                    out += val.values()
                else:
                    out.append(val)
            out += [isinstance(val, dict) and val.values() or val for val in value]
        elif isinstance(value, dict):
            out += value.values()
        else:
            out.append(value)
    out = dict(zip(out, len(out)*[''])).keys() # Remove duplicates
    if name == 'citation_date':
        for idx in range(len(out)):
            out[idx] = out[idx].replace('-', '/')

    if kb:
        if kb_default_output == "{value}":
            out = [bfo.kb(kb, value, value) for value in out]
        else:
            out = [bfo.kb(kb, value, kb_default_output) for value in out]
    return '\n'.join([create_metatag(name=name, content=value) for value in out])