def get_image_url(soup, url): og_image = get_og_property(soup, 'image') if og_image: return urljoin(url, og_image) tw_image = get_twitter_property(soup, 'image') if tw_image: return urljoin(url, tw_image)
def get_preview_video(soup): #TODO: find using property="twitter:player:url" and "twitter:player:stream:url" # (ref: view-source:https://www.kickstarter.com/projects/1087256999/odiun-a-web-site) if soup: og_vids = get_og_property(soup, 'video', deep=True) for og_vid in og_vids: if og_vid.endswith('.mp4'): return og_vid if len(og_vids) > 0: return og_vids[0] tw_vid = get_twitter_property(soup, 'player') if tw_vid: return tw_vid og_vid_url = get_og_property(soup, 'video:url') if og_vid_url: return og_vid_url return ''
def get_preview_picture(soup): if soup: og_pic = get_og_property(soup, 'image') if og_pic: return og_pic tw_pic = get_twitter_property(soup, 'image') if tw_pic: return tw_pic return ''
def get_type(url, soup): if 'imgur.com' in url: type_str = 'image' else: type_str = get_og_property(soup, 'type') if type_str: type_str = type_str.split('.')[0] return ContentType.get_content_type(type_str) return None
def get_description(soup, feed): og_desc = get_og_property(soup, 'description') if og_desc: return og_desc tw_desc = get_twitter_property(soup, 'description') if tw_desc: return tw_desc meta_desc = get_meta_property(soup, 'description') if meta_desc: return meta_desc return clean_html(feed.description)
def get_title(soup, feed): og_title = get_og_property(soup, 'title') if og_title: return og_title tw_title = get_twitter_property(soup, 'title') if tw_title: return tw_title meta_title = get_meta_property(soup, 'title') if meta_title: return meta_title return clean_html(feed.title)
def get_metadata_description(soup): og_desc = get_og_property(soup, 'description') if og_desc: return og_desc tw_desc = get_twitter_property(soup, 'description') if tw_desc: return tw_desc meta_desc = get_meta_property(soup, 'description') if not meta_desc: meta_desc = get_meta_property(soup, 'Description') if meta_desc: return meta_desc return ''
def get_metadata_title(soup): og_title = get_og_property(soup, 'title') if og_title: return og_title tw_title = get_twitter_property(soup, 'title') if tw_title: return tw_title meta_title = get_meta_property(soup, 'title') if meta_title: return meta_title if soup: if soup.find('title'): return soup.find('title').get_text() return ''
def get_site_name(soup, session): site_name = get_og_property(soup, 'site_name') if site_name: return SiteName.get_or_create_site_name(session, site_name) return None
def get_url(page_request, soup): url = get_og_property(soup, 'url') if not url or not is_valid_url(url): url = page_request.url return url