def get_og_data(url: str): resp = requests.get(url) if not resp.ok: logger.info(f"could not get opengraph data for url: {url}: {resp}") return {} og_data = None og_article = OpenGraph(html=resp.content, scrape=True) if og_article.is_valid(): tags = [ "site_name", "type", "title", "description", "image", "image:alt", "image:height", "image:width", ] og_data = {k: v for k, v in og_article.items() if k in tags} return og_data
def details(self, url): # pylint: disable=no-self-use """ Returns bug details to be used later. By default this method returns OpenGraph metadata (dict) which is shown in the UI as tooltips. You can override this method to provide different information. """ result = OpenGraph(url, scrape=True) # remove data which we don't need for key in ['_url', 'url', 'scrape', 'type']: if key in result: del result[key] return result
def _is_video(self, url: str) -> [dict, None]: log.debug('Detect if `%s` is video URL', url) if VK_VIDEO_RE.search(url): data = { 'url': url, 'title': 'video', 'description': '', } resp = OpenGraph(url=url, scrape=True) log.debug('OpenGraph for `%s` is:\n%s', url, resp) if resp.is_valid(): data['title'] = resp.get('title', resp.get('video:title', data['url'])) data['description'] = resp.get( 'description', resp.get('video:description', data['url'])) log.debug('URL `%s` defined as video', url) return data else: log.debug('URL `%s` not match video regex', url)
def extract_opengraph_data(html_content): opengraph = OpenGraph() opengraph.parser(html_content) return opengraph if opengraph.is_valid() else None