Ejemplo n.º 1
0
def parse_for_metadata(context, data, html):
    meta = context.params.get('meta', {})
    meta_date = context.params.get('meta_date', {})

    meta_paths = meta
    meta_paths.update(meta_date)

    for key, xpaths in meta_paths.items():
        for xpath in ensure_list(xpaths):
            element = html.find(xpath)
            if element is None:
                continue
            value = collapse_spaces(element.text_content())
            if key in meta_date:
                value = iso_date(value)
            if value is not None:
                data[key] = value
            break

    return data
Ejemplo n.º 2
0
def parse_for_metadata(context, data, html):
    meta = context.params.get("meta", {})
    meta_date = context.params.get("meta_date", {})

    meta_paths = meta
    meta_paths.update(meta_date)

    for key, xpaths in meta_paths.items():
        for xpath in ensure_list(xpaths):
            for element in ensure_list(html.xpath(xpath)):
                try:
                    value = collapse_spaces(element.text_content())
                except AttributeError:
                    # useful when element is an attribute
                    value = collapse_spaces(str(element))
                if key in meta_date:
                    value = iso_date(value)
                if value is not None:
                    data[key] = value
                break
    return data