def get_link_embed_data(url: str, maxwidth: Optional[int]=640, maxheight: Optional[int]=480) -> Optional[Dict[Any, Any]]: if not is_link(url): return None # Fetch information from URL. # We are using three sources in next order: # 1. OEmbed # 2. Open Graph # 3. Meta tags try: data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) except requests.exceptions.RequestException: msg = 'Unable to fetch information from url {0}, traceback: {1}' logging.error(msg.format(url, traceback.format_exc())) return None data = data or {} response = requests.get(url) if response.ok: og_data = OpenGraphParser(response.text).extract_data() if og_data: data.update(og_data) generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def get_link_embed_data(url: str, maxwidth: int = 640, maxheight: int = 480) -> Optional[Dict[str, Any]]: if not is_link(url): return None if not valid_content_type(url): return None # We are using two different mechanisms to get the embed data # 1. Use OEmbed data, if found, for photo and video "type" sites # 2. Otherwise, use a combination of Open Graph tags and Meta tags data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {} if data.get("oembed"): return data response = PreviewSession().get(mark_sanitized(url), stream=True) if response.ok: og_data = OpenGraphParser( response.content, response.headers.get("Content-Type")).extract_data() for key in ["title", "description", "image"]: if not data.get(key) and og_data.get(key): data[key] = og_data[key] generic_data = (GenericParser( response.content, response.headers.get("Content-Type")).extract_data() or {}) for key in ["title", "description", "image"]: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def test_error_request(self, get): # type: (Any) -> None get.return_value = response = mock.Mock() response.ok = False url = 'http://instagram.com/p/BLtI2WdAymy' data = get_oembed_data(url) self.assertIsNone(data)
def get_link_embed_data( url: str, maxwidth: Optional[int] = 640, maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]: if not is_link(url): return None if not valid_content_type(url): return None # Fetch information from URL. # We are using three sources in next order: # 1. OEmbed # 2. Open Graph # 3. Meta tags data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {} response = requests.get(url, stream=True, headers=HEADERS, timeout=TIMEOUT) if response.ok: og_data = OpenGraphParser(response.text).extract_data() if og_data: data.update(og_data) generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def get_link_embed_data( url: str, maxwidth: Optional[int] = 640, maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]: if not is_link(url): return None # Fetch information from URL. # We are using three sources in next order: # 1. OEmbed # 2. Open Graph # 3. Meta tags try: data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) except requests.exceptions.RequestException: # This is what happens if the target URL cannot be fetched; in # that case, there's nothing to do here, and this URL has no # open graph data. return None data = data or {} response = requests.get(url) if response.ok: og_data = OpenGraphParser(response.text).extract_data() if og_data: data.update(og_data) generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def get_link_embed_data(url: str, maxwidth: int = 640, maxheight: int = 480) -> Optional[UrlEmbedData]: if not is_link(url): return None if not valid_content_type(url): return None # The oembed data from pyoembed may be complete enough to return # as-is; if so, we use it. Otherwise, we use it as a _base_ for # the other, less sophisticated techniques which we apply as # successive fallbacks. data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) if data is not None and isinstance(data, UrlOEmbedData): return data response = PreviewSession().get(mark_sanitized(url), stream=True) if not response.ok: return None if data is None: data = UrlEmbedData() for parser_class in (OpenGraphParser, GenericParser): parser = parser_class(response.content, response.headers.get("Content-Type")) data.merge(parser.extract_data()) if data.image: data.image = urljoin(response.url, data.image) return data
def test_error_request(self, get): # type: (Any) -> None get.return_value = response = mock.Mock() response.ok = False url = 'http://instagram.com/p/BLtI2WdAymy' data = get_oembed_data(url) self.assertIsNone(data)
def get_link_embed_data(url: str, maxwidth: Optional[int]=640, maxheight: Optional[int]=480) -> Optional[Dict[str, Any]]: if not is_link(url): return None # Fetch information from URL. # We are using three sources in next order: # 1. OEmbed # 2. Open Graph # 3. Meta tags try: data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) except requests.exceptions.RequestException: # This is what happens if the target URL cannot be fetched; in # that case, there's nothing to do here, and this URL has no # open graph data. return None data = data or {} response = requests.get(url) if response.ok: og_data = OpenGraphParser(response.text).extract_data() if og_data: data.update(og_data) generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def test_video_provider(self) -> None: response_data = { "type": "video", "thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg", "thumbnail_width": 640, "thumbnail_height": 426, "title": "NASA", "html": "<p>test</p>", "version": "1.0", "width": 658, "height": 400, } url = "http://blip.tv/video/158727223" reconstructed_url = reconstruct_url(url) responses.add( responses.GET, reconstructed_url, json=response_data, status=200, ) data = get_oembed_data(url) assert data is not None self.assertIsInstance(data, UrlOEmbedData) self.assertEqual(data.title, response_data["title"])
def get_link_embed_data( url: str, maxwidth: Optional[int] = 640, maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]: if not is_link(url): return None if not valid_content_type(url): return None # We are using two different mechanisms to get the embed data # 1. Use OEmbed data, if found, for photo and video "type" sites # 2. Otherwise, use a combination of Open Graph tags and Meta tags data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {} if data.get('oembed'): return data response = requests.get(mark_sanitized(url), stream=True, headers=HEADERS, timeout=TIMEOUT) if response.ok: og_data = OpenGraphParser(response.text).extract_data() for key in ['title', 'description', 'image']: if not data.get(key) and og_data.get(key): data[key] = og_data[key] generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def test_photo_provider(self, get: Any) -> None: get.return_value = response = mock.Mock() response.headers = {"content-type": "application/json"} response.ok = True response_data = { "type": "photo", "thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg", "url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg", "thumbnail_width": 640, "thumbnail_height": 426, "title": "NASA", "html": "<p>test</p>", "version": "1.0", "width": 658, "height": 400, } response.text = orjson.dumps(response_data).decode() url = "http://imgur.com/photo/158727223" data = get_oembed_data(url) self.assertIsInstance(data, dict) self.assertIn("title", data) assert data is not None # allow mypy to infer data is indexable self.assertEqual(data["title"], response_data["title"]) self.assertTrue(data["oembed"])
def get_link_embed_data(url, maxwidth=640, maxheight=480): # type: (Text, Optional[int], Optional[int]) -> Optional[Dict[Any, Any]] if not is_link(url): return None # Fetch information from URL. # We are using three sources in next order: # 1. OEmbed # 2. Open Graph # 3. Meta tags try: data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) except requests.exceptions.RequestException: msg = 'Unable to fetch information from url {0}, traceback: {1}' logging.error(msg.format(url, traceback.format_exc())) return None data = data or {} response = requests.get(url) if response.ok: og_data = OpenGraphParser(response.text).extract_data() if og_data: data.update(og_data) generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def test_photo_provider(self, get: Any) -> None: get.return_value = response = mock.Mock() response.headers = {'content-type': 'application/json'} response.ok = True response_data = { 'type': 'photo', 'thumbnail_url': 'https://scontent.cdninstagram.com/t51.2885-15/n.jpg', 'url': 'https://scontent.cdninstagram.com/t51.2885-15/n.jpg', 'thumbnail_width': 640, 'thumbnail_height': 426, 'title': 'NASA', 'html': '<p>test</p>', 'version': '1.0', 'width': 658, 'height': 400 } response.text = ujson.dumps(response_data) url = 'http://imgur.com/photo/158727223' data = get_oembed_data(url) self.assertIsInstance(data, dict) self.assertIn('title', data) assert data is not None # allow mypy to infer data is indexable self.assertEqual(data['title'], response_data['title']) self.assertTrue(data['oembed'])
def test_photo_provider(self) -> None: response_data = { "type": "photo", "thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg", "url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg", "thumbnail_width": 640, "thumbnail_height": 426, "title": "NASA", "html": "<p>test</p>", "version": "1.0", "width": 658, "height": 400, } # pyoembed.providers.imgur only works with http:// URLs, not https:// (!) url = "http://imgur.com/photo/158727223" reconstructed_url = reconstruct_url(url) responses.add( responses.GET, reconstructed_url, json=response_data, status=200, ) data = get_oembed_data(url) self.assertIsInstance(data, dict) self.assertIn("title", data) assert data is not None # allow mypy to infer data is indexable self.assertEqual(data["title"], response_data["title"]) self.assertTrue(data["oembed"])
def test_present_provider(self) -> None: response_data = { "type": "rich", "thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg", "thumbnail_width": 640, "thumbnail_height": 426, "title": "NASA", "html": "<p>test</p>", "version": "1.0", "width": 658, "height": 400, } url = "http://instagram.com/p/BLtI2WdAymy" reconstructed_url = reconstruct_url(url) responses.add( responses.GET, reconstructed_url, json=response_data, status=200, ) data = get_oembed_data(url) self.assertIsInstance(data, dict) self.assertIn("title", data) assert data is not None # allow mypy to infer data is indexable self.assertEqual(data["title"], response_data["title"])
def test_invalid_json_in_response(self, get: Any) -> None: get.return_value = response = mock.Mock() response.headers = {"content-type": "application/json"} response.ok = True response.text = "{invalid json}" url = "http://instagram.com/p/BLtI2WdAymy" data = get_oembed_data(url) self.assertIsNone(data)
def test_invalid_json_in_response(self) -> None: url = "http://instagram.com/p/BLtI2WdAymy" reconstructed_url = reconstruct_url(url) responses.add( responses.GET, reconstructed_url, json="{invalid json}", status=200, ) data = get_oembed_data(url) self.assertIsNone(data)
def test_present_provider(self, get: Any) -> None: get.return_value = response = mock.Mock() response.headers = {'content-type': 'application/json'} response.ok = True response_data = { 'type': 'rich', 'thumbnail_url': 'https://scontent.cdninstagram.com/t51.2885-15/n.jpg', 'thumbnail_width': 640, 'thumbnail_height': 426, 'title': 'NASA', 'html': '<p>test</p>', 'version': '1.0', 'width': 658, 'height': None} response.text = ujson.dumps(response_data) url = 'http://instagram.com/p/BLtI2WdAymy' data = get_oembed_data(url) self.assertIsInstance(data, dict) self.assertIn('title', data) self.assertEqual(data['title'], response_data['title'])
def test_present_provider(self, get: Any) -> None: get.return_value = response = mock.Mock() response.headers = {'content-type': 'application/json'} response.ok = True response_data = { 'type': 'rich', 'thumbnail_url': 'https://scontent.cdninstagram.com/t51.2885-15/n.jpg', 'thumbnail_width': 640, 'thumbnail_height': 426, 'title': 'NASA', 'html': '<p>test</p>', 'version': '1.0', 'width': 658, 'height': None } response.text = ujson.dumps(response_data) url = 'http://instagram.com/p/BLtI2WdAymy' data = get_oembed_data(url) self.assertIsInstance(data, dict) self.assertIn('title', data) self.assertEqual(data['title'], response_data['title'])
def test_500_error_request(self) -> None: url = "http://instagram.com/p/BLtI2WdAymy" reconstructed_url = reconstruct_url(url) responses.add(responses.GET, reconstructed_url, status=500) data = get_oembed_data(url) self.assertIsNone(data)
def test_connect_error_request(self) -> None: url = "http://instagram.com/p/BLtI2WdAymy" reconstructed_url = reconstruct_url(url) responses.add(responses.GET, reconstructed_url, body=ConnectionError()) data = get_oembed_data(url) self.assertIsNone(data)