Ejemplo n.º 1
0
    def test_index_can_store_photo(self):
        """Test index can store a photo."""
        self.index.es.index = MagicMock()
        time.time = MagicMock(return_value=time.time())

        url = Url.from_string('http://example.com')
        path = PhotoPath(self.datadir)
        path.filesize = MagicMock(return_value=10000)

        photo = LoadingPhoto(url=url, path=path, refresh_rate=refresh.Hourly)

        self.index.save_photo(photo)
        self.index.es.index.assert_called_with(
            index='photos',
            doc_type='photo',
            id=path.uuid,
            body={
                'url_id': url.hash(),
                'refresh_rate': refresh.Hourly.lock_format(),
                'captured_at': refresh.Hourly().lock(),
                'filesize': photo.filesize(),
                'filename': photo.filename(),
                'directory': photo.directory(),
                'domain': photo.domain(),
                'timestamp': int(time.time())
            })
Ejemplo n.º 2
0
 def test_loading_photo_can_be_saved_to_datadir(self):
     """Test a loading photo can be saved to data directory."""
     path = PhotoPath(self.datadir)
     url = Url.from_string('https://example.com')
     photo = LoadingPhoto(url=url, path=path, refresh_rate=refresh.Hourly)
     photo.save_loading_text()
     self.assertTrue(isfile(path.full_path()))
     self.assertEqual('loading', photo.get_raw())
Ejemplo n.º 3
0
    def test_camera_can_save_screenshot(self):
        """Test camera can save screenshot."""
        self.creates_webdriver()
        self.camera.webdriver.get_window_size = MagicMock()
        self.camera.webdriver.save = MagicMock()
        self.camera.webdriver.save_screenshot = MagicMock()

        path = PhotoPath(self.datadir)
        self.camera._save(path)

        self.camera.webdriver.save_screenshot.assert_called_with(
            path.full_path())
Ejemplo n.º 4
0
    def test_filesystem_can_translate_path_to_file_in_datadir(self):
        """Test filesystem can translate path to file in datadir."""
        datadir_path = PhotoPath(self.datadir)
        url = Url.from_string('https://example.com/foo/bar')
        photo = Screenshot(url, datadir_path, self.refresh_rate)
        self.index.es.index = MagicMock()
        photo.path.filesize = MagicMock(return_value=10000)
        self.index.save_photo(photo)

        self.index.photos_file_exists = MagicMock(return_value=123000)
        self.index.photos_get_photo = MagicMock(return_value=photo)

        path = self.filesystem._translate_path(
            '/example.com/2019-01-13H20:00/foo/bar.png')
        self.assertEqual(datadir_path.full_path(), path)
Ejemplo n.º 5
0
    def _save(self, path: PhotoPath):
        """Save screen shot.

        Args:
            path: PhotoPath object used to retrieve path in data directory
            to save png file in
        """
        height = self.webdriver.get_window_size()['height']
        width = self.webdriver.get_window_size()['width']
        console.dca(f'saving png with viewport resolution [{width}x{height}]')
        console.dca('png output resolution [{}x{}]'.format(
            int(width * self.dpi), int(height * self.dpi)))
        self.webdriver.save_screenshot(path.full_path())
Ejemplo n.º 6
0
    def tick(self):
        """Tick.

        Checkout a url from index, take photo of url,
        save to datadir and update index with photo
        metadata
        """
        try:
            timer = time.time()
            url = self._checkout_url()

            console.dp(f'taking photo of {url.to_string()}')

            path = PhotoPath(self.datadir)
            photo = LoadingPhoto(
                url=url,
                path=path,
                refresh_rate=self.refresh_rate
            )
            photo.save_loading_text()
            self.index.save_photo(photo)

            camera = c.Camera(
                viewport_width=self.viewport_width,
                viewport_height=self.viewport_height,
                viewport_max_height=self.viewport_max_height,
                addons={
                    'IDCAC': Addons.IDCAC,
                    'REFERER_HEADER': Addons.REFERER_HEADER,
                    'UBLOCK_ORIGIN': Addons.UBLOCK_ORIGIN,
                }
            )
            photo = camera.take_picture(url, path, self.refresh_rate)
            self.index.save_photo(photo)

            timer = int(time.time() - timer)
            console.p(
                f'photo was taken of {url.to_string()} took: {timer}s'
            )

        except EmptySearchResultException as e:
            pass
        finally:
            time.sleep(1)
Ejemplo n.º 7
0
    def photos_get_photo(self, domain: str, captured_at: str,
                         full_filename: str,
                         refresh_rate: Type[RefreshRate]) -> Photo:
        """Get photo from photos index.

        Args:
            domain: domain photo belongs to
            captured_at: when photo was captured
            full_filename: full filename of photo
                eg. /some/path/some-filename.png
            refresh_rate: Given refresh rate photo was taken with

        Returns:
            Requested photo metadata
            Photo

        Raises:
            PhotoNotFoundException: If photo was not found
        """
        directory = '/'.join(full_filename.split('/')[:-1])
        directory = directory.rstrip('/') + '/'
        filename = full_filename.split('/')[-1:][0]

        captured_at = file.LastCapture.translate(captured_at, domain, self,
                                                 refresh_rate)

        res = self.es.search(index=Index.PHOTOS,
                             size=1,
                             body={
                                 'query': {
                                     'bool': {
                                         'must': [{
                                             'term': {
                                                 'domain': domain
                                             }
                                         }, {
                                             'term': {
                                                 'refresh_rate':
                                                 refresh_rate.lock_format()
                                             }
                                         }, {
                                             'term': {
                                                 'captured_at': captured_at
                                             }
                                         }, {
                                             'term': {
                                                 'directory': directory
                                             }
                                         }, {
                                             'term': {
                                                 'filename': filename
                                             }
                                         }]
                                     }
                                 },
                             })

        if res['hits']['total'] == 0:
            raise PhotoNotFoundException('no photo was found')

        res = res['hits']['hits'][0]
        uuid = res['_id']

        if self.datadir is None:
            raise Exception('Cannot get photo from Index without a data dir')

        path = PhotoPath(self.datadir, uuid=uuid)

        photo = Screenshot(url=UrlId(res['_source']['url_id']),
                           path=path,
                           refresh_rate=refresh_rate,
                           index_filesize=res['_source']['filesize'])

        return photo
Ejemplo n.º 8
0
 def test_photo_path_can_be_created(self):
     """Pass."""
     path = PhotoPath(self.datadir)
     self.assertGreater(len(path.uuid), 20)
     self.assertIn(member=self.datadir.root, container=path.full_path())
Ejemplo n.º 9
0
    def take_picture(self,
                     url: Url,
                     path: PhotoPath,
                     refresh_rate: Type[RefreshRate],
                     retry: int = 5) -> Screenshot:
        """Take picture of url.

        Uses the selenium webdriver to load url in firefox,
        make sure the entire page and it's assets are loaded
        then write it to data directory as a png.

        Args:
            url: Url to take picture of
            path: Path to store url at
            refresh_rate: Refresh rate for photo
            retry: Number of times to retry if a timeout exception is
                thrown (default: 5)

        Returns:
            A picture of the given url
            Screenshot
        """
        try:
            console.dca('launching firefox, camera: {}x{} [{}]'.format(
                self.viewport_width,
                self.viewport_height if self.viewport_height != 0 else 'full',
                self.dpi))

            profile = self._create_webdriver_profile()
            self.webdriver = self._create_webdriver(profile)
            self._install_webdriver_addons(self.addons)

            threads.Controller.webdrivers.append(
                self.webdriver.service.process.pid)

            console.dca(f'routing camera to {url.to_string()}')

            try:
                self._route_to_blank()
                self._route(url)
                self._route(url)
                self._route(url)
            except TimeoutException as e:
                retry = retry - 1
                if retry < 0:
                    raise e
                console.dca('routing reached timeout, retrying')
                self.webdriver.quit()
                return self.take_picture(url, path, refresh_rate, retry)

            if self.viewport_height != 0:
                # fixed height
                console.dca('taking fixed screenshot')
                self._set_resolution(self.viewport_width, self.viewport_height)
                self._start_images_monitor()
                self._wait_for_images_to_load()
            else:
                # fullpage screenshot
                console.dca('taking fullpage screenshot')
                self._set_resolution(self.viewport_width, 1080)
                self._start_images_monitor()
                self._wait_for_images_to_load()

                # scroll down the page to trigger load of images
                console.dca('making sure all images have loaded')
                steps = int(self._document_height() / 800)
                for i in range(1, steps):
                    scroll_to = i * 800
                    self._scroll_y_axis(scroll_to)
                    self._wait_for_images_to_load()
                    time.sleep(0.5)

                # resize the viewport and make sure that it's scrolled
                # all the way to the top
                console.dca(f'resizing camera viewport for {url.to_string()}')
                self._scroll_y_axis(self._document_height() * -1)
                if self.viewport_max_height is None:
                    height = self._document_height()
                elif self._document_height() > self.viewport_max_height:
                    height = self.viewport_max_height
                else:
                    height = self._document_height()
                self._wait_for_images_to_load()
                self._set_resolution(self.viewport_width, height)
                self._wait_for_images_to_load()
                self._scroll_y_axis(-height)
                self._wait_for_resize()

            console.dca(f'saving screenshot of {url.to_string()}')
            self._save(path)
        except RemoteDisconnected:
            pass
        except ProtocolError:
            pass
        except MaxRetryError:
            pass
        finally:
            if self.webdriver:
                self.webdriver.quit()

        if path.should_optimize():
            console.dca(f'optimizing screenshot of {url.to_string()}')
            timer = time.time()
            path.optimize()
            seconds = int(time.time() - timer)
            console.dca(f'optimizing of {url.to_string()} took {seconds}s')

        return Screenshot(url=url, path=path, refresh_rate=refresh_rate)