Beispiel #1
0
def get_og_data(url: str):

    resp = requests.get(url)
    if not resp.ok:
        logger.info(f"could not get opengraph data for url: {url}: {resp}")
        return {}

    og_data = None
    og_article = OpenGraph(html=resp.content, scrape=True)
    if og_article.is_valid():
        tags = [
            "site_name",
            "type",
            "title",
            "description",
            "image",
            "image:alt",
            "image:height",
            "image:width",
        ]
        og_data = {k: v for k, v in og_article.items() if k in tags}
    return og_data
Beispiel #2
0
    def details(self, url):  # pylint: disable=no-self-use
        """
            Returns bug details to be used later. By default this method
            returns OpenGraph metadata (dict) which is shown in the UI as tooltips.
            You can override this method to provide different information.
        """
        result = OpenGraph(url, scrape=True)

        # remove data which we don't need
        for key in ['_url', 'url', 'scrape', 'type']:
            if key in result:
                del result[key]

        return result
Beispiel #3
0
    def _is_video(self, url: str) -> [dict, None]:
        log.debug('Detect if `%s` is video URL', url)

        if VK_VIDEO_RE.search(url):
            data = {
                'url': url,
                'title': 'video',
                'description': '',
            }
            resp = OpenGraph(url=url, scrape=True)
            log.debug('OpenGraph for `%s` is:\n%s', url, resp)

            if resp.is_valid():
                data['title'] = resp.get('title',
                                         resp.get('video:title', data['url']))
                data['description'] = resp.get(
                    'description', resp.get('video:description', data['url']))

            log.debug('URL `%s` defined as video', url)
            return data
        else:
            log.debug('URL `%s` not match video regex', url)
def extract_opengraph_data(html_content):
    opengraph = OpenGraph()
    opengraph.parser(html_content)

    return opengraph if opengraph.is_valid() else None