Esempio n. 1
0
def scrape_images(url: str):
    """
    Scrape webpage for images and create multiple PageImage objects

    Args:
        url (str): URL of page to be scrapped
    """
    images_tuple = get_all_images_data(url)
    page = PageImage(url=url)
    page.save_to_db()
    for image_url, img_data in images_tuple:
        img = Image(img_url=image_url, page_id=page.id, data=img_data)
        img.save_to_db()
Esempio n. 2
0
    def test_save_to_db(self):
        """
        Test auxiliary method for saving objects in DB
        """
        page = PageImage(
            url="http://example.com",
            created_at=datetime.now(),
        )
        self.assertIsNone(page.id)
        self.assertEqual(PageImage.query.all(), [])

        page.save_to_db()

        # Objects receive ID after saving in DB
        self.assertIsNotNone(page.id)
        self.assertEqual(PageImage.query.all()[0], page)
Esempio n. 3
0
    def get(self):
        """
        Get PageImage for already scrapped page.

        Returns:
            [type]: [description]
        """
        data = PageImageView.parser.parse_args()
        return PageImage.find_url_or_404(data["url"]).to_json()
Esempio n. 4
0
    def test_get_all_images_list_view(self):
        """
        Test get method for ImageListView
        Should return all created PageImage objects
        """
        page = PageImage(
            url="http://example.com",
            created_at=datetime.now(),
        )
        page.save_to_db()
        page2 = PageImage(
            url="http://other.com",
            created_at=datetime.now(),
        )
        page2.save_to_db()

        response = self.client.get("/images")
        self.assertEqual(len(response.json), 2)
        self.assertEqual(response.json, [page.to_json(), page2.to_json()])
Esempio n. 5
0
    def test_find_url_or_404(self):
        """
        Test classmethod should return only newer object
        """
        with freeze_time("2021-03-24 14:15:16"):
            page = PageImage(
                url="http://example.com",
                created_at=datetime.now(),
            )
        self.db.session.add(page)
        self.db.session.commit()
        with freeze_time("2021-03-25 17:15:16"):
            page2 = PageImage(
                url="http://example.com",
                created_at=datetime.now(),
            )
        self.db.session.add(page2)
        self.db.session.commit()

        self.assertEqual(PageImage.find_url_or_404(url="http://example.com"),
                         page2)
Esempio n. 6
0
    def test_get_specific_image_view(self):
        """
        Test get method for ImageView
        Should return response for given url only
        """
        page = PageImage(
            url="http://example.com",
            created_at=datetime.now(),
        )
        page.save_to_db()
        page2 = PageImage(
            url="http://other.com",
            created_at=datetime.now(),
        )
        page2.save_to_db()

        response = self.client.get("/image", data={"url": "http://other.com"})
        self.assertEqual(response.json, page2.to_json())
Esempio n. 7
0
 def test_to_json_method(self):
     """
     Test auxiliary method for returing JSON object
     """
     with freeze_time("2021-03-25 14:15:16"):
         page = PageImage(
             url="http://example.com",
             created_at=datetime.now(),
         )
     self.db.session.add(page)
     self.db.session.commit()
     img1 = Image(img_url="http://example.com/img_1.png",
                  page_id=page.id,
                  data=b"123")
     self.db.session.add(img1)
     self.db.session.commit()
     expected_response = {
         "id": page.id,
         "url": "http://example.com",
         "images": ["<Image url: http://example.com/img_1.png>"],
         "created_at":
         "25/03/21 14:15:16",  # expected date format is "%d/%m/%y %H:%M:%S"
     }
     self.assertEqual(page.to_json(), expected_response)
Esempio n. 8
0
    def get(self):
        """
        Get all images for already scrapped url as zip .

        Returns:
            [type]: [description]
        """
        data = PageImageView.parser.parse_args()
        page = PageImage.find_url_or_404(data["url"])

        images_data = [img.data for img in page.images]

        if images_data:
            zf = archive_bytes_stream(images_data)

            return send_file(
                BytesIO(zf),
                attachment_filename=f"{page.url}_images.zip",
                as_attachment=True,
            )
Esempio n. 9
0
 def test_find_url_or_404_no_object(self):
     """
     Test classmethod should raise NotFound exception if object with given URL does not exist
     """
     with self.assertRaises(NotFound):
         PageImage.find_url_or_404(url="http://example.com")