Exemple #1
0
def get_image_url(soup, url):
    og_image = get_og_property(soup, 'image')
    if og_image:
        return urljoin(url, og_image)
    tw_image = get_twitter_property(soup, 'image')
    if tw_image:
        return urljoin(url, tw_image)
Exemple #2
0
def get_preview_video(soup):
    #TODO: find using property="twitter:player:url" and "twitter:player:stream:url" 
    # (ref: view-source:https://www.kickstarter.com/projects/1087256999/odiun-a-web-site)
    if soup:
        og_vids = get_og_property(soup, 'video', deep=True)
        for og_vid in og_vids:
            if og_vid.endswith('.mp4'):
                return og_vid
        if len(og_vids) > 0:
            return og_vids[0]
        tw_vid = get_twitter_property(soup, 'player') 
        if tw_vid:
            return tw_vid
        og_vid_url = get_og_property(soup, 'video:url')
        if og_vid_url:
            return og_vid_url
    return ''
Exemple #3
0
def get_preview_picture(soup):
    if soup:
        og_pic = get_og_property(soup, 'image')
        if og_pic:
            return og_pic
        tw_pic = get_twitter_property(soup, 'image')
        if tw_pic:
            return tw_pic
    return ''
Exemple #4
0
def get_type(url, soup):
    if 'imgur.com' in url:
        type_str = 'image'
    else:
        type_str = get_og_property(soup, 'type')
    if type_str:
        type_str = type_str.split('.')[0]
        return ContentType.get_content_type(type_str)
    return None
Exemple #5
0
def get_description(soup, feed):
    og_desc = get_og_property(soup, 'description')
    if og_desc:
        return og_desc
    tw_desc = get_twitter_property(soup, 'description')
    if tw_desc:
        return tw_desc
    meta_desc = get_meta_property(soup, 'description')
    if meta_desc:
        return meta_desc
    return clean_html(feed.description)
Exemple #6
0
def get_title(soup, feed):
    og_title = get_og_property(soup, 'title')
    if og_title:
        return og_title
    tw_title = get_twitter_property(soup, 'title')
    if tw_title:
        return tw_title
    meta_title = get_meta_property(soup, 'title')
    if meta_title:
        return meta_title
    return clean_html(feed.title)
Exemple #7
0
def get_metadata_description(soup):
    og_desc = get_og_property(soup, 'description')
    if og_desc:
        return og_desc
    tw_desc = get_twitter_property(soup, 'description')
    if tw_desc:
        return tw_desc
    meta_desc = get_meta_property(soup, 'description')
    if not meta_desc:
        meta_desc = get_meta_property(soup, 'Description')
    if meta_desc:
        return meta_desc
    return ''
Exemple #8
0
def get_metadata_title(soup):
    og_title = get_og_property(soup, 'title')
    if og_title:
        return og_title
    tw_title = get_twitter_property(soup, 'title')
    if tw_title:
        return tw_title
    meta_title = get_meta_property(soup, 'title')
    if meta_title:
        return meta_title
    if soup:
        if soup.find('title'):
            return soup.find('title').get_text()
    return ''
Exemple #9
0
def get_site_name(soup, session):
    site_name = get_og_property(soup, 'site_name')
    if site_name:
        return SiteName.get_or_create_site_name(session, site_name)
    return None
Exemple #10
0
def get_url(page_request, soup):
    url = get_og_property(soup, 'url')
    if not url or not is_valid_url(url):
        url = page_request.url
    return url