예제 #1
0
def test_scan_scan_xz():
    """ Unit test for scan method, lzma case. """
    file_io = FileIO()

    for method_row, util_row in zip(file_io.scan(TEST_CSV_XZ),
                                    util_test_csv(TEST_CSV)):
        assert method_row == util_row
예제 #2
0
def test_scan_scan_bzip2():
    """ Unit test for scan method, gzip case. """
    file_io = FileIO()

    for method_row, util_row in zip(file_io.scan(TEST_CSV_BZ2),
                                    util_test_csv(TEST_CSV)):
        assert method_row == util_row
예제 #3
0
def test_scan_tar_files():
    """Unit test for method scan, compressed tar file case."""
    file_io = FileIO()

    for ffile in os.listdir(TEST_TAR_LOCATION):
        for dfile in file_io.scan(os.path.join(TEST_TAR_LOCATION, ffile)):
            assert os.path.exists(dfile)
예제 #4
0
def test__get_url_unsuccessful():
    """
    Unit tests for _get_url method, unsuccessful case.
    """
    try:
        FileIO._get_url(TEST_UNKNOWN)
    except IOError:
        assert True
예제 #5
0
def test_scan_csv():
    """ Unit test for scan method, txt or csv case. """

    file_io = FileIO()

    for method_row, util_row in zip(file_io.scan(TEST_CSV),
                                    util_test_csv(TEST_CSV)):
        assert method_row == util_row
예제 #6
0
    def test_scan_unknown_file(self):
        """
        Unit test for scan method, unknown file case
        """
        file_io = FileIO()

        with self.assertRaises(FileTypeNotSupportedYet):
            next(file_io.scan(TEST_UNKNOWN))
예제 #7
0
def test_infer_file_type_from_uri_remote():
    """
    Unit test for method infer_file_type_from_uri,
    remote case.
    """
    file_io = FileIO()

    assert file_io.infer_file_type_from_uri(TEST_URL) == 'JPG'
예제 #8
0
def test_resolve_path_end():
    """Unit test for method test_resolve_path_end."""
    test_path_with_sep = '/just/a/test/path/'
    test_path_without_sep = '/just/a/test/path'

    expected_return = '/just/a/test/path'

    assert FileIO.resolve_path_end(test_path_with_sep) == expected_return
    assert FileIO.resolve_path_end(test_path_without_sep) == expected_return
예제 #9
0
def test_infer_file_type_from_uri_no_mimetype():
    """
    Unit test for method infer_file_type_from_uri,
    no mimetype case
    """
    file_io = FileIO()

    assert file_io.infer_file_type_from_uri(TEST_LOCAL,
                                            mimetype=False) == 'JPG'
예제 #10
0
def test_scan_compressed_tar_file_http():
    """Unit test for method scan_compressed_tar_file, http case."""
    test_uri = 'http://localhost:8888/images.tar.xz'
    test_file_reader = 'r|xz'

    file_io = FileIO()

    for ffile in file_io.scan_compressed_tar_file(test_uri, test_file_reader):
        assert os.path.exists(ffile)
예제 #11
0
def test_infer_file_type_from_uri_with_mimetype():
    """
    Unit test for method infer_file_type_from_uri,
    with mimetype case
    """
    file_io = FileIO()

    _, mime = file_io.infer_file_type_from_uri(TEST_LOCAL, mimetype=True)

    assert mime == 'image/jpeg'
예제 #12
0
    def test_infer_file_type_from_uri_unsupported(self):
        """
        Unit test for method infer_file_type_from_uri,
        unsupported file type case
        """
        file_io = FileIO()

        with self.assertRaises(FileTypeNotSupportedYet):
            file_io.infer_file_type_from_uri(TEST_UNSUPPORTED_FILE_TYPE,
                                             mimetype=True)
예제 #13
0
def test_dump():
    """Unit test for method dump."""
    file_io = FileIO()

    file_io.dump(TEST_SCAN_DIR, tempfile.gettempdir())

    assert is_tarfile(
        os.path.join(
            tempfile.gettempdir(), '.'.join(
                (os.path.basename(file_io.resolve_path_end(TEST_SCAN_DIR)),
                 'pupyl'))))
예제 #14
0
def test_progress_not_precise():
    """Unit test for method progress, not precise case."""
    test_generator = range(10)
    test_unpacked = [*test_generator]

    test_result_generator = FileIO.progress(test_generator, precise=False)
    test_result_unpacked = FileIO.progress(test_unpacked, precise=False)

    for t_gen, r_gen in zip(test_generator, test_result_generator):
        assert t_gen == r_gen

    for t_unp, r_unp in zip(test_unpacked, test_result_unpacked):
        assert t_unp == r_unp
예제 #15
0
def test_scan_directory():
    """
    Unit test for scan method, directory case
    """
    file_io = FileIO()

    test_against_tree = [
        abspath(f'{TEST_SCAN_DIR}{ffile}')
        for ffile in [*walk(TEST_SCAN_DIR)][0][-1]
    ]

    test_current_tree = [*file_io.scan(abspath(TEST_SCAN_DIR))]

    assert test_current_tree == test_against_tree
예제 #16
0
def test_infer_file_type_tar_files():
    """Unit test for method infer_file_type_from_uri, tar file case."""
    file_io = FileIO()

    for ffile in os.listdir(TEST_TAR_LOCATION):
        test_file_type = mimetypes.guess_type(
            os.path.join(TEST_TAR_LOCATION, ffile))[1]

        assert file_io.infer_file_type_from_uri(
            os.path.join(TEST_TAR_LOCATION, ffile),
            mimetype=True) == TarCompressedTypes.mime(test_file_type)

        assert file_io.infer_file_type_from_uri(
            os.path.join(TEST_TAR_LOCATION, ffile),
            mimetype=False) == TarCompressedTypes.name(test_file_type)
예제 #17
0
def test_remove():
    """Unit test for method remove."""
    index_to_remove = 8

    temp_file = FileIO.safe_temp_file(file_name='pupyl.index')
    temp_dir = os.path.dirname(temp_file)

    with Index(TEST_VECTOR_SIZE, data_dir=temp_dir) as index:
        for _ in range(16):
            index.append(numpy.random.normal(size=TEST_VECTOR_SIZE))

        test_size_before = len(index)

        test_value = index[index_to_remove]

    with Index(TEST_VECTOR_SIZE, data_dir=temp_dir) as index:
        index.remove(index_to_remove)

        assert len(index) == test_size_before - 1

        numpy.testing.assert_raises(
            AssertionError,
            numpy.testing.assert_array_equal,
            test_value,
            index[index_to_remove]
        )
예제 #18
0
    def __init__(self, data_dir=None, **kwargs):
        if data_dir:
            self._data_dir = data_dir
        else:
            self._data_dir = FileIO.pupyl_temp_data_dir()

        self._index_config_path = os.path.join(self._data_dir, 'index.json')

        configurations = self._index_configuration('r')

        if configurations:
            self._import_images = configurations['import_images']
            self._characteristic = Characteristics.by_name(
                configurations['characteristic'])

            if configurations.get('feature_size'):
                self._feature_size = configurations['feature_size']
        else:
            import_images = kwargs.get('import_images')
            characteristic = kwargs.get('characteristic')

            if import_images:
                self._import_images = import_images
            else:
                self._import_images = True

            if characteristic:
                self._characteristic = characteristic
            else:
                self._characteristic = Characteristics.\
                    HEAVYWEIGHT_HUGE_PRECISION

        self.image_database = ImageDatabase(import_images=self._import_images,
                                            data_dir=self._data_dir)
예제 #19
0
    def __init__(self, size, data_dir=None, trees=.001, volatile=False):
        """
        Indexing tensors operations and nearest neighbours search.

        Parameters
        ----------
        size: int
            Shape of unidimensional vectors which will be indexed

        data_dir: str
            Location where to load or save the index

        trees (optional): float
            Defines the number of trees to create based on the dataset
            size. Should be a number between 0 and 1.

        volatile (optional): bool
            If the index will be temporary or not.
        """
        self._position = -1
        self._size = size
        self._data_dir = data_dir
        self._trees = trees
        self._volatile = volatile

        if self._data_dir and not self._volatile:
            if os.path.isfile(self._data_dir):
                raise OSError('data_dir parameter is not a directory')

            os.makedirs(self._data_dir, exist_ok=True)
            self._path = os.path.join(self._data_dir, self.index_name)
        elif not self._data_dir and not self._volatile:
            raise NoDataDirForPermanentIndex
        elif not self._data_dir and self._volatile:
            _temp_file = FileIO.safe_temp_file()
            self._data_dir = os.path.dirname(_temp_file)
            self._path = _temp_file

        else:
            raise DataDirDefinedForVolatileIndex

        if os.path.isfile(self._path):
            try:
                self.tree = AnnoyIndex(size, metric='angular')

                self.tree.load(self._path)

                self._is_new_index = False
            except OSError as os_error:
                raise FileIsNotAnIndex from os_error
        else:
            self.tree = AnnoyIndex(size, metric='angular')
            self._is_new_index = True

        self._image_database = ImageDatabase(
            import_images=True,
            data_dir=self._data_dir,
        )
예제 #20
0
def test_safe_temp_file_exists():
    """Unit test for method safe_temp_file, file exists case."""
    test_temp_file_name = 'just_a_temp_file.txt'

    Path(join(tempfile.gettempdir(), test_temp_file_name)).touch()

    _ = FileIO.safe_temp_file(file_name=test_temp_file_name)

    assert not exists(test_temp_file_name)
예제 #21
0
def test_scan_compressed_tar_file_local():
    """Unit test for method scan_compressed_tar_file, local case."""
    test_tar_compressed_file_readers = {
        'TZ2': 'r:bz2',
        'TGZ': 'r:gz',
        'TXZ': 'r:xz'
    }

    file_io = FileIO()

    for ffile in os.listdir(TEST_TAR_LOCATION):
        test_file_type = mimetypes.guess_type(
            os.path.join(TEST_TAR_LOCATION, ffile))[1]

        for dfile in file_io.scan_compressed_tar_file(
                os.path.join(TEST_TAR_LOCATION, ffile),
                test_tar_compressed_file_readers[TarCompressedTypes.name(
                    test_file_type)]):
            assert os.path.exists(dfile)
예제 #22
0
def test_get_metadata_local():
    """Unit test for method get_metadata, local case."""
    test_metadata = {
        'original_file_name': 'test_image.jpg',
        'original_path': abspath('tests'),
        'original_file_size': '5K'
    }

    test_local_metadata = FileIO.get_metadata(TEST_LOCAL)

    del test_local_metadata['original_access_time']

    assert test_metadata == test_local_metadata
예제 #23
0
def test_progress_precise():
    """Unit test for method progress, not precise case."""
    def test_gen():
        """Closure to test functions which returns generators."""
        for value in range(10):
            yield value

    test_generator = range(10)
    test_unpacked = [*test_generator]

    test_result_generator = FileIO.progress(test_generator, precise=True)
    test_result_unpacked = FileIO.progress(test_unpacked, precise=True)
    test_result_func_gen = FileIO.progress(test_gen(), precise=True)

    for t_gen, r_gen in zip(test_generator, test_result_generator):
        assert t_gen == r_gen

    for t_unp, r_unp in zip(test_unpacked, test_result_unpacked):
        assert t_unp == r_unp

    for t_fgen, r_fgen in zip(test_generator, test_result_func_gen):
        assert t_fgen == r_fgen
예제 #24
0
def test_get_metadata_http_no_date():
    """Unit test for method get_metadata, http and not date case."""
    test_metadata = {
        'original_file_name':
        'axuvb8oxm7liskynxggfczfus.jpg',
        'original_path':
        """http://images.protopage.com/view/
        572714""".replace('\n        ', '')
    }

    test_request_metadata = FileIO.get_metadata(TEST_URL_NO_DATE)

    del test_request_metadata['original_access_time']
    del test_request_metadata['original_file_size']

    assert test_metadata == test_request_metadata
예제 #25
0
def test_bind():
    """Unit test for method bind."""
    file_io = FileIO()

    file_io.dump(TEST_SCAN_DIR, tempfile.gettempdir())

    file_io.bind(
        os.path.join(
            tempfile.gettempdir(), '.'.join(
                (os.path.basename(file_io.resolve_path_end(TEST_SCAN_DIR)),
                 'pupyl'))), os.path.join(tempfile.gettempdir(),
                                          TEST_SCAN_DIR))

    assert os.path.isdir(os.path.join(tempfile.gettempdir(), TEST_SCAN_DIR))
예제 #26
0
def test_get_metadata_http():
    """Unit test for method get_metadata, http case."""
    test_metadata = {
        'original_file_name':
        '320px-Cheshm-Nazar.JPG',
        'original_path':
        """https://upload.wikimedia.org/wikipedia/commons/
        thumb/e/e4/Cheshm-Nazar.JPG""".replace('\n        ', ''),
        'original_file_size':
        '9K'
    }

    test_request_metadata = FileIO.get_metadata(TEST_URL)

    del test_request_metadata['original_access_time']

    assert test_metadata == test_request_metadata
예제 #27
0
def test_pop():
    """Unit test for method pop."""
    temp_file = FileIO.safe_temp_file(file_name='pupyl.index')
    temp_dir = os.path.dirname(temp_file)

    with Index(TEST_VECTOR_SIZE, data_dir=temp_dir) as index:
        for _ in range(16):
            index.append(numpy.random.normal(size=TEST_VECTOR_SIZE))

        test_size_before = len(index)

        test_value_before = index[-1]

    with Index(TEST_VECTOR_SIZE, data_dir=temp_dir) as index:
        test_value_after = index.pop()

        assert len(index) == test_size_before - 1

        numpy.testing.assert_array_equal(
            test_value_before,
            test_value_after
        )
예제 #28
0
    def export_by_group_by(self, path, top=10, **kwargs):
        """
        Saves images, creating directories, based on their groups.

        Parameters
        ----------
        path: str
            Place to create the directories and export images

        top (optional, default 10):
            How many similar internal images should be returned

        position (optional): int
            Returns the groups based on a specified position.
        """
        for element in FileIO.progress(
            self.group_by(
                top=top,
                position=kwargs.get('position')
            )
        ):
            if isinstance(element, dict):
                item = [*element.keys()][0]
                similars = element[item]
            elif isinstance(element, list):
                item = kwargs['position']
                similars = element

            save_path = os.path.join(
                path,
                str(item)
            )

            os.makedirs(
                save_path,
                exist_ok=True
            )

            try:
                copyfile(
                    self._image_database.mount_file_name(
                        item,
                        'jpg'
                    ),
                    os.path.join(
                        save_path,
                        'group.jpg'
                    )
                )
            except FileNotFoundError:
                continue

            for rank, similar in enumerate(similars):

                original_file_path = self._image_database.mount_file_name(
                    similar,
                    'jpg'
                )

                try:
                    copyfile(
                        original_file_path,
                        os.path.join(
                            save_path,
                            f'{rank + 1}.jpg'
                        )
                    )
                except FileNotFoundError:
                    continue
예제 #29
0
def serve(data_dir=None, port=8080):
    """
    Start the web server.

    Parameters
    ----------
    port (optional)(default: 8080): int
        Defines the network port which the web server
        will start listening.
    """
    if not data_dir:
        data_dir = FileIO.pupyl_temp_data_dir()

    pupyl_image_search = PupylImageSearch(data_dir)

    class RequestHandler(SimpleHTTPRequestHandler):
        """A web request handler."""

        _data_dir = data_dir

        def __init__(self, request, client_address, server):
            SimpleHTTPRequestHandler.__init__(self, request, client_address,
                                              server)

        def do_GET(self):
            """Handler for GET request methods."""

            query_image = None

            self.send_response(200)
            self.send_header('Content-type', 'text/html')
            self.end_headers()

            query_string = parse_qs(urlparse(self.path).query)

            query_list = query_string.get('uri', None)

            image_tags = self.images(query_list)

            if query_list:
                query_image = '<img class="img-thumbnail" ' + \
                    f'src="{query_list[0]}">' + \
                    '<figcaption class="figure-caption">' + \
                    'Query image used in the search.</figcaption>'

            self.wfile.write(
                bytes(
                    TEMPLATE.format(images=image_tags,
                                    query=query_image if query_image else ''),
                    'utf-8'))

        @staticmethod
        def filter_metadata(index):
            """
            Return a filtered metadata information.

            Parameters
            ----------
            index: int
                Index number of image

            Returns
            -------
            dict:
                With a pre-filtered metadata.
            """
            metadata = pupyl_image_search.image_database.\
                load_image_metadata(
                    index, filtered=(
                        'original_file_name',
                        'original_file_size'
                    )
                )

            return ', '.join(map(str, metadata.values()))

        def images(self, query_uri=None, top=None):
            """
            Return image tags from database.

            Parameters
            ----------
            query_uri (optional): str
                Location where the query image is stored.

            top (optional)(default: 24): int
                How many results should be returned from some search request.
            """

            image_tags = ''
            img_src = '<figure class="figure">' + \
                '<img class="img-fluid border"' + \
                'src="data:image/jpg;base64, {image_b64}" ' + \
                'alt="&#129535; Pupyl"><figcaption class="figure-caption">' + \
                '{figure_caption}</figcaption></figure>'

            top = top if top else 24

            if query_uri:
                query_uri = query_uri[0]

                for result in pupyl_image_search.search(query_uri, top=top):
                    result = int(result)

                    image = pupyl_image_search.image_database.\
                        get_image_bytes_to_base64(
                            pupyl_image_search.image_database.
                            load_image(result)
                        ).decode('utf-8')

                    filtered_metadata = self.filter_metadata(result)

                    image_tags += img_src.format(
                        image_b64=image, figure_caption=filtered_metadata)

                return image_tags

            for index, image in pupyl_image_search.image_database.list_images(
                    return_index=True, top=9):
                image_base64 = pupyl_image_search.image_database.\
                    get_image_base64(
                        image
                    ).decode('utf-8')

                filtered_metadata = self.filter_metadata(index)

                image_tags += img_src.format(image_b64=image_base64,
                                             figure_caption=filtered_metadata)

            return image_tags

    if not port:
        port = 8080

    try:
        with socketserver.TCPServer(('', port), RequestHandler) as httpd:
            print(
                termcolor.colored(f'Server listening on port {port}.',
                                  color='green',
                                  attrs=['bold']))

            webbrowser.open_new_tab(f'http://localhost:{port}')
            httpd.serve_forever()

    except OSError:
        print(
            termcolor.colored(
                f'Port {port} already in use. Trying {port + 1}...',
                color='red',
                attrs=['bold']))

        serve(data_dir=data_dir, port=port + 1)
    except KeyboardInterrupt:
        print('🧿 Pupyl says bye.')
예제 #30
0
def test_safe_temp_file():
    """Unit test for method safe_temp_file."""
    test_temp_file_name = FileIO.safe_temp_file()

    assert not exists(test_temp_file_name)