def test_core_class_vs_method_settings(self): url = "http://lassie.it/core/class_vs_method_settings.html" l = Lassie() data = l.fetch(url) self.assertEqual(len(data["images"]), 1) l.open_graph = False data = l.fetch(url) # open_graph is set to False so there shouldn't be any images in the list this time around self.assertEqual(len(data["images"]), 0)
def test_core_class_vs_method_settings(self): url = 'http://lassie.it/core/class_vs_method_settings.html' l = Lassie() data = l.fetch(url) self.assertEqual(len(data['images']), 1) l.open_graph = False data = l.fetch(url) # open_graph is set to False so there shouldn't be any images in the list this time around self.assertEqual(len(data['images']), 0)
def test_no_html_tag(self): url = 'http://lassie.it/core/no_html_tag.html' l = Lassie() data = l.fetch(url) self.assertTrue('no_html_tag' in data['title'])
def test_str_image(self): url = 'http://lassie.it/amp/str_image.html' l = Lassie() data = l.fetch(url) self.assertEqual(1, len(data['images']))
def test_bad_json(self): url = 'http://lassie.it/amp/bad_json.html' l = Lassie() data = l.fetch(url) self.assertTrue('amp' in data['url'])
def test_video_objects(self): url = 'http://lassie.it/amp/video_objects.html' l = Lassie() data = l.fetch(url) self.assertEqual(1, len(data['videos']))
def test_list_thumbnail_image(self): url = 'http://lassie.it/amp/list_thumbnail_image.html' l = Lassie() data = l.fetch(url) self.assertEqual(2, len(data['images']))
def test_bad_url(self): url = 'http://lassie.it/youtube/bad_url_123456.json' l = Lassie() data = l.fetch(url) self.assertIsNone(data.get('oembed'))
def test_list_json(self): url = 'http://lassie.it/amp/list_json.html' l = Lassie() data = l.fetch(url) self.assertTrue('Pixar' in data['description'])
def test_list_image_empty(self): url = 'http://lassie.it/amp/list_image_empty.html' l = Lassie() data = l.fetch(url) self.assertEqual(1, len(data['images']))
def test_youtube_good(self): url = 'http://lassie.it/youtube/good.json' l = Lassie() data = l.fetch(url) self.assertEqual(len(data['videos']), 1) self.assertEqual(len(data['images']), 1)
def test_core_class_setting_is_none(self): url = "http://lassie.it/core/class_setting_is_none.html" # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz. l = Lassie() l.open_graph = None data = l.fetch(url, open_graph=False) self.assertEqual(len(data["images"]), 0)
def test_core_class_setting_is_none(self): url = 'http://lassie.it/core/class_setting_is_none.html' # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz. l = Lassie() l.open_graph = None data = l.fetch(url, open_graph=False) self.assertEqual(len(data['images']), 0)
def test_all_properites(self): url = 'http://lassie.it/amp/all_properties.html' l = Lassie() data = l.fetch(url, all_images=True) self.assertEqual(len(data['images']), 3) title = 'Google Glass Is Dead, Long Live Snapchat Spectacles' self.assertEqual(data['title'], title)
def test_bad_image_dimensions(self): url = "http://lassie.it/core/bad_image_dimensions.html" l = Lassie() data = l.fetch(url, all_images=True) # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image) image = data["images"][0] self.assertTrue(not "width" in image) self.assertTrue(not "height" in image)
def test_bad_image_dimensions(self): url = 'http://lassie.it/core/bad_image_dimensions.html' l = Lassie() data = l.fetch(url, all_images=True) # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image) image = data['images'][0] self.assertTrue(not 'width' in image) self.assertTrue(not 'height' in image)
def test_core_retrieve_all_images(self): url = 'http://lassie.it/core/retrieve_all_images.html' l = Lassie() l.all_images = True data = l.fetch(url) self.assertEqual(len(data['images']), 3) last_image = data['images'][2] self.assertEqual(last_image['width'], 550) self.assertEqual(last_image['height'], 365)
def test_core_retrieve_all_images(self): url = "http://lassie.it/core/retrieve_all_images.html" l = Lassie() l.all_images = True data = l.fetch(url) self.assertEqual(len(data["images"]), 3) last_image = data["images"][2] self.assertEqual(last_image["width"], 550) self.assertEqual(last_image["height"], 365)
def test_image_dimensions(self): url = 'http://lassie.it/core/image_dimensions.html' l = Lassie() data = l.fetch(url, all_images=True) self.assertEqual(len(data['images']), 4) image = data['images'][0] self.assertEqual(image['width'], 100) self.assertEqual(image['height'], 100) image = data['images'][1] self.assertEqual(image['width'], 100) self.assertEqual(image['height'], 100) image = data['images'][2] self.assertEqual(image['width'], 100) self.assertEqual(image['height'], 100) image = data['images'][3] self.assertEqual(image['width'], 100) self.assertEqual(image['height'], 100)
def test_image_dimensions(self): url = "http://lassie.it/core/image_dimensions.html" l = Lassie() data = l.fetch(url, all_images=True) self.assertEqual(len(data["images"]), 4) image = data["images"][0] self.assertEqual(image["width"], 100) self.assertEqual(image["height"], 100) image = data["images"][1] self.assertEqual(image["width"], 100) self.assertEqual(image["height"], 100) image = data["images"][2] self.assertEqual(image["width"], 100) self.assertEqual(image["height"], 100) image = data["images"][3] self.assertEqual(image["width"], 100) self.assertEqual(image["height"], 100)
def get_page_info(url: str, timeout: int = 4) -> Optional[PageInfo]: """Возвращает информацию о странице, расположенной по указанному адресу, либо None. :param url: :param timeout: Таймаут на подключение. """ if not url: return None lassie = Lassie() lassie.request_opts = {'timeout': timeout} try: result = lassie.fetch( url, touch_icon=False, favicon=False, ) except LassieError: # В LassieError заворачиваются исключения requests, # в т.ч.ошибки подключения, таймаут и пр. return None if result['status_code'] != 200: return None info = PageInfo( title=result.get('title', ''), description=result.get('description', ''), site_name=result.get('site_name', ''), images=result['images'], ) return info
try: resp = webclient.head(url, timeout=10, headers={'User-Agent': user_agent}) b['status'] = resp.status_code except Exception as err: print('Request failed: {}'.format(err)) continue if b['status'] != 200: not_ok.append(b) continue # Follow redirects one hop. if resp.is_redirect: url = resp.headers['Location'] if resp.headers.get('content-type', '').startswith('text/html'): try: summary = l.fetch(url) b['title'] = summary['title'].strip() b['url'] = summary['url'].strip() except Exception as err: print('Fetching {} failed with error:\n{}'.format(url, err)) bookmarks.append(b) data['bookmarks'] = bookmarks with open('cleaned_' + args.bmfile, 'w') as f: json.dump(data, f) with open('not_ok.json', 'w') as f: json.dump(not_ok, f)
def test_core_bad_keywords(self): url = 'http://lassie.it/core/bad_keywords.html' l = Lassie() data = l.fetch(url) self.assertEqual(data.get('keywords'), [])
def test_youtube_no_type(self): url = 'http://lassie.it/youtube/no_type.json' l = Lassie() data = l.fetch(url)
def test_youtube_bad_html(self): url = 'http://lassie.it/youtube/bad_html.json' l = Lassie() data = l.fetch(url)
print(sample) pprint(sample) print("*" * 100) sample = lassie.fetch('https://www.youtube.com/watch?v=R6IT_f0XPT8', all_images=True) print(sample) pprint(sample) print("*" * 100) from lassie import Lassie l = Lassie() sample = l.fetch('https://www.youtube.com/watch?v=R6IT_f0XPT8') print(sample) pprint(sample) print("*" * 100) l.request_opts = { 'headers': { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) ' 'Version/12.1.1 Safari/605.1.15 ' } } l.request_opts = {'timeout': 0.1}