コード例 #1
0
ファイル: preview.py プロジェクト: code-dub/zulip
def get_link_embed_data(
        url: str,
        maxwidth: Optional[int] = 640,
        maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]:
    if not is_link(url):
        return None

    if not valid_content_type(url):
        return None

    # We are using two different mechanisms to get the embed data
    # 1. Use OEmbed data, if found, for photo and video "type" sites
    # 2. Otherwise, use a combination of Open Graph tags and Meta tags
    data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {}
    if data.get('oembed'):
        return data

    response = requests.get(mark_sanitized(url),
                            stream=True,
                            headers=HEADERS,
                            timeout=TIMEOUT)
    if response.ok:
        og_data = OpenGraphParser(response.text).extract_data()
        for key in ['title', 'description', 'image']:
            if not data.get(key) and og_data.get(key):
                data[key] = og_data[key]

        generic_data = GenericParser(response.text).extract_data() or {}
        for key in ['title', 'description', 'image']:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
コード例 #2
0
ファイル: preview.py プロジェクト: pastewka/zulip
def get_link_embed_data(url: str,
                        maxwidth: int = 640,
                        maxheight: int = 480) -> Optional[Dict[str, Any]]:
    if not is_link(url):
        return None

    if not valid_content_type(url):
        return None

    # We are using two different mechanisms to get the embed data
    # 1. Use OEmbed data, if found, for photo and video "type" sites
    # 2. Otherwise, use a combination of Open Graph tags and Meta tags
    data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {}
    if data.get("oembed"):
        return data

    response = PreviewSession().get(mark_sanitized(url), stream=True)
    if response.ok:
        og_data = OpenGraphParser(
            response.content,
            response.headers.get("Content-Type")).extract_data()
        for key in ["title", "description", "image"]:
            if not data.get(key) and og_data.get(key):
                data[key] = og_data[key]

        generic_data = (GenericParser(
            response.content,
            response.headers.get("Content-Type")).extract_data() or {})
        for key in ["title", "description", "image"]:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
コード例 #3
0
ファイル: test_link_embed.py プロジェクト: yushao2/zulip
 def test_charset_in_header(self) -> None:
     html = """<html>
       <head>
         <meta property="og:title" content="中文" />
       </head>
     </html>""".encode("big5")
     parser = OpenGraphParser(html, "text/html; charset=Big5")
     result = parser.extract_data()
     self.assertEqual(result["title"], "中文")
コード例 #4
0
ファイル: test_link_embed.py プロジェクト: priyank-p/zulip
    def test_page_with_og(self) -> None:
        html = b"""<html>
          <head>
          <meta property="og:title" content="The Rock" />
          <meta property="og:type" content="video.movie" />
          <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
          <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
          <meta property="og:description" content="The Rock film" />
          </head>
        </html>"""

        parser = OpenGraphParser(html, "text/html; charset=UTF-8")
        result = parser.extract_data()
        self.assertEqual(result.title, "The Rock")
        self.assertEqual(result.description, "The Rock film")
コード例 #5
0
ファイル: test_link_embed.py プロジェクト: rohanpra/zulip
    def test_page_with_og(self) -> None:
        html = """<html>
          <head>
          <meta property="og:title" content="The Rock" />
          <meta property="og:type" content="video.movie" />
          <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
          <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
          <meta property="og:description" content="The Rock film" />
          </head>
        </html>"""

        parser = OpenGraphParser(html)
        result = parser.extract_data()
        self.assertIn('title', result)
        self.assertEqual(result['title'], 'The Rock')
        self.assertEqual(result.get('description'), 'The Rock film')
コード例 #6
0
def get_link_embed_data(url, maxwidth=640, maxheight=480):
    # type: (Text, Optional[int], Optional[int]) -> Optional[Dict[Any, Any]]
    if not is_link(url):
        return None
    # Fetch information from URL.
    # We are using three sources in next order:
    # 1. OEmbed
    # 2. Open Graph
    # 3. Meta tags
    try:
        data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight)
    except requests.exceptions.RequestException:
        msg = 'Unable to fetch information from url {0}, traceback: {1}'
        logging.error(msg.format(url, traceback.format_exc()))
        return None
    data = data or {}
    response = requests.get(url)
    if response.ok:
        og_data = OpenGraphParser(response.text).extract_data()
        if og_data:
            data.update(og_data)
        generic_data = GenericParser(response.text).extract_data() or {}
        for key in ['title', 'description', 'image']:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
コード例 #7
0
ファイル: test_link_embed.py プロジェクト: 284928489/zulip
    def test_page_with_og(self) -> None:
        html = """<html>
          <head>
          <meta property="og:title" content="The Rock" />
          <meta property="og:type" content="video.movie" />
          <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
          <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
          <meta property="og:description" content="The Rock film" />
          </head>
        </html>"""

        parser = OpenGraphParser(html)
        result = parser.extract_data()
        self.assertIn('title', result)
        self.assertEqual(result['title'], 'The Rock')
        self.assertEqual(result.get('description'), 'The Rock film')
コード例 #8
0
def get_link_embed_data(
        url: str,
        maxwidth: Optional[int] = 640,
        maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]:
    if not is_link(url):
        return None
    # Fetch information from URL.
    # We are using three sources in next order:
    # 1. OEmbed
    # 2. Open Graph
    # 3. Meta tags
    try:
        data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight)
    except requests.exceptions.RequestException:
        # This is what happens if the target URL cannot be fetched; in
        # that case, there's nothing to do here, and this URL has no
        # open graph data.
        return None
    data = data or {}
    response = requests.get(url)
    if response.ok:
        og_data = OpenGraphParser(response.text).extract_data()
        if og_data:
            data.update(og_data)
        generic_data = GenericParser(response.text).extract_data() or {}
        for key in ['title', 'description', 'image']:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
コード例 #9
0
ファイル: preview.py プロジェクト: saranyagokulramkumar/zulip
def get_link_embed_data(
        url: str,
        maxwidth: Optional[int] = 640,
        maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]:
    if not is_link(url):
        return None

    if not valid_content_type(url):
        return None

    # Fetch information from URL.
    # We are using three sources in next order:
    # 1. OEmbed
    # 2. Open Graph
    # 3. Meta tags
    data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {}
    response = requests.get(url, stream=True, headers=HEADERS, timeout=TIMEOUT)
    if response.ok:
        og_data = OpenGraphParser(response.text).extract_data()
        if og_data:
            data.update(og_data)
        generic_data = GenericParser(response.text).extract_data() or {}
        for key in ['title', 'description', 'image']:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
コード例 #10
0
ファイル: test_link_embed.py プロジェクト: yushao2/zulip
    def test_page_with_evil_og_tags(self) -> None:
        html = b"""<html>
          <head>
          <meta property="og:title" content="The Rock" />
          <meta property="og:type" content="video.movie" />
          <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
          <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
          <meta property="og:description" content="The Rock film" />
          <meta property="og:html" content="<script>alert(window.location)</script>" />
          <meta property="og:oembed" content="True" />
          </head>
        </html>"""

        parser = OpenGraphParser(html, "text/html; charset=UTF-8")
        result = parser.extract_data()
        self.assertIn("title", result)
        self.assertEqual(result["title"], "The Rock")
        self.assertEqual(result.get("description"), "The Rock film")
        self.assertEqual(result.get("oembed"), None)
        self.assertEqual(result.get("html"), None)