Example #1
0
def fetch(url, **kwargs):
    """Constructs and sends a :class:`Lassie <Lassie>`
    Retrieves content from the specified url, parses it, and returns
    a beautifully crafted dictionary of important information about that
    web page.

    Priority tree is as follows:
        1. Open Graph
        2. Twitter Card
        3. Other meta content (i.e. description, keywords)

    :param url: URL to send a GET request to
    :param open_graph: (optional) If ``True``, filters web page content for Open Graph meta tags. The content of these properties have top priority on return values.
    :type open_graph: bool
    :param twitter_card: (optional) If ``True``, filters web page content for Twitter Card meta tags
    :type twitter_card: bool
    :param touch_icon: (optional) If ``True``, retrieves Apple touch icons and includes them in the response ``images`` array
    :type touch_icon: bool
    :param favicon: (optional) If ``True``, retrieves any favicon images and includes them in the response ``images`` array
    :type favicon: bool
    :param all_images: (optional) If ``True``, retrieves images inside web pages body and includes them in the response ``images`` array. Default: False
    :type all_images: bool
    :param parser: (optional) String reference for the parser that BeautifulSoup will use
    :type parser: string

    """
    l = Lassie()
    return l.fetch(url, **kwargs)
Example #2
0
def fetch(url, **kwargs):
    """Constructs and sends a :class:`Lassie <Lassie>`
    Retrieves content from the specified url, parses it, and returns
    a beautifully crafted dictionary of important information about that
    web page.

    Priority tree is as follows:
        1. Open Graph
        2. Twitter Card
        3. Other meta content (i.e. description, keywords)

    :param url: URL to send a GET request to
    :param open_graph: (optional) If ``True``, filters web page content for Open Graph meta tags. The content of these properties have top priority on return values.
    :type open_graph: bool
    :param twitter_card: (optional) If ``True``, filters web page content for Twitter Card meta tags
    :type twitter_card: bool
    :param touch_icon: (optional) If ``True``, retrieves Apple touch icons and includes them in the response ``images`` array
    :type touch_icon: bool
    :param favicon: (optional) If ``True``, retrieves any favicon images and includes them in the response ``images`` array
    :type favicon: bool
    :param all_images: (optional) If ``True``, retrieves images inside web pages body and includes them in the response ``images`` array. Default: False
    :type all_images: bool
    :param parser: (optional) String reference for the parser that BeautifulSoup will use
    :type parser: string

    """
    l = Lassie()
    return l.fetch(url, **kwargs)
Example #3
0
    def test_core_class_setting_is_none(self):
        url = "http://lassie.it/core/class_setting_is_none.html"

        # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz.
        l = Lassie()
        l.open_graph = None
        data = l.fetch(url, open_graph=False)

        self.assertEqual(len(data["images"]), 0)
Example #4
0
    def test_core_class_setting_is_none(self):
        url = 'http://lassie.it/core/class_setting_is_none.html'

        # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz.
        l = Lassie()
        l.open_graph = None
        data = l.fetch(url, open_graph=False)

        self.assertEqual(len(data['images']), 0)
Example #5
0
    def test_bad_image_dimensions(self):
        url = 'http://lassie.it/core/bad_image_dimensions.html'

        l = Lassie()
        data = l.fetch(url, all_images=True)

        # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image)
        image = data['images'][0]
        self.assertTrue(not 'width' in image)
        self.assertTrue(not 'height' in image)
Example #6
0
    def test_request_opts(self):
        l = Lassie()
        l.request_opts = {"headers": {"User-Agent": "lassie python"}, "timeout": 3}

        self.assertTrue(set(("headers", "timeout")).issubset(l.request_opts))

        # If they modify one of the keys value, make sure it actually happened
        l.request_opts["headers"].update({"Content-Type": "application/json"})
        self.assertEqual(len(l.request_opts["headers"]), 2)
        self.assertTrue(set(("User-Agent", "Content-Type")).issubset(l.request_opts["headers"]))
Example #7
0
    def test_bad_image_dimensions(self):
        url = "http://lassie.it/core/bad_image_dimensions.html"

        l = Lassie()
        data = l.fetch(url, all_images=True)

        # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image)
        image = data["images"][0]
        self.assertTrue(not "width" in image)
        self.assertTrue(not "height" in image)
Example #8
0
    def test_bad_request_opts(self):
        l = Lassie()
        l.request_opts = {
            'bad_key': True,
            'headers': {
                'User-Agent': 'lassie python'
            }
        }

        self.assertTrue('bad_key' not in l.request_opts)
        self.assertTrue('headers' in l.request_opts)
Example #9
0
    def test_core_retrieve_all_images(self):
        url = 'http://lassie.it/core/retrieve_all_images.html'

        l = Lassie()
        l.all_images = True

        data = l.fetch(url)
        self.assertEqual(len(data['images']), 3)

        last_image = data['images'][2]
        self.assertEqual(last_image['width'], 550)
        self.assertEqual(last_image['height'], 365)
Example #10
0
    def test_core_retrieve_all_images(self):
        url = "http://lassie.it/core/retrieve_all_images.html"

        l = Lassie()
        l.all_images = True

        data = l.fetch(url)
        self.assertEqual(len(data["images"]), 3)

        last_image = data["images"][2]
        self.assertEqual(last_image["width"], 550)
        self.assertEqual(last_image["height"], 365)
Example #11
0
    def test_core_class_vs_method_settings(self):
        url = 'http://lassie.it/core/class_vs_method_settings.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(len(data['images']), 1)

        l.open_graph = False
        data = l.fetch(url)

        # open_graph is set to False so there shouldn't be any images in the list this time around
        self.assertEqual(len(data['images']), 0)
Example #12
0
    def test_core_class_vs_method_settings(self):
        url = "http://lassie.it/core/class_vs_method_settings.html"

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(len(data["images"]), 1)

        l.open_graph = False
        data = l.fetch(url)

        # open_graph is set to False so there shouldn't be any images in the list this time around
        self.assertEqual(len(data["images"]), 0)
Example #13
0
    def test_request_opts(self):
        l = Lassie()
        l.request_opts = {
            'headers': {
                'User-Agent': 'lassie python',
            },
            'timeout': 3
        }

        self.assertTrue(set(('headers', 'timeout')).issubset(l.request_opts))

        # If they modify one of the keys value, make sure it actually happened
        l.request_opts['headers'].update({'Content-Type': 'application/json'})
        self.assertEqual(len(l.request_opts['headers']), 2)
        self.assertTrue(
            set(('User-Agent',
                 'Content-Type')).issubset(l.request_opts['headers']))
Example #14
0
    def test_image_dimensions(self):
        url = 'http://lassie.it/core/image_dimensions.html'

        l = Lassie()
        data = l.fetch(url, all_images=True)

        self.assertEqual(len(data['images']), 4)

        image = data['images'][0]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][1]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][2]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][3]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)
Example #15
0
    def test_image_dimensions(self):
        url = "http://lassie.it/core/image_dimensions.html"

        l = Lassie()
        data = l.fetch(url, all_images=True)

        self.assertEqual(len(data["images"]), 4)

        image = data["images"][0]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)

        image = data["images"][1]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)

        image = data["images"][2]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)

        image = data["images"][3]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)
Example #16
0
    def test_core_no_content_raises_error(self):
        url = 'http://lassie.it/core/empty.html'

        l = Lassie()
        self.assertRaises(LassieError, l.fetch, url)
Example #17
0
    def test_bad_request_opts(self):
        l = Lassie()
        l.request_opts = {"bad_key": True, "headers": {"User-Agent": "lassie python"}}

        self.assertTrue("bad_key" not in l.request_opts)
        self.assertTrue("headers" in l.request_opts)