Esempio n. 1
0
def call_dir(model: callable, src: Path, dst: Path, min_siz: int = 50, bs: int = None, parts: int = None,
             stem: str = None, verbose: bool = False, step: int = 100):
    """
    Index image representations
    Args:
        model: model for representation
        src: source directory of images
        dst: destination directory for indexing
        min_siz: minimum image size
        bs: batch size
        parts: partition data
        stem: file name
        verbose: logging flag
        step: step to log after
    """
    paths_list = [pt for pt in src.iterdir() if pt.suffix in IMG_EXTS]
    paths = [paths_list[i:i + bs] for i in range(0, len(paths_list), bs)]
    logger.print_texts(verbose, f'there are {len(paths)} images to index')
    chunks_bt = [paths] if parts is None else [paths[i:i + parts] for i in range(0, len(paths), parts)]
    for sidx, chunk in enumerate(chunks_bt):
        vec_bts = list(_encode_all(model, *chunk, min_siz=min_siz, verbose=verbose, step=step))
        vecs = listify_results(vec_bts)
        part_dst = _prepare_dir(dst, sidx, parts=parts, stem=stem)
        _dump_data(str(part_dst), vecs, verbose=verbose)
        logger.print_texts(verbose, f'chunk {sidx + 1} is indexed')
        del vec_bts
        del vecs
Esempio n. 2
0
def _dump_data(dst: str, reprs: list, verbose: bool = True):
    """
    Serialize vectors in file
    Args:
        dst: destination file path
        reprs: vectors to serialize
        verbose: logging flag
    """
    with open(dst, 'wb') as vecs:
        pkl.dump(reprs, vecs)
        logger.print_texts(verbose, f'Saved len(full_dicts) = {len(reprs)}')
Esempio n. 3
0
def load_dbvecs(vectors_file: str, verbose: bool = True):
    """
    Read serialized vectors
    Args:
        vectors_file: directory to store vectors
        verbose: logging flag

    Returns:
        images_dict: list of dictionaries of vectorized images
    """
    with open(vectors_file, 'rb') as vecs:
        images_dict = pkl.load(vecs)
        logger.print_texts(verbose, f'{len(images_dict)} vectors are extracted')

    return images_dict
Esempio n. 4
0
def dump(src_files: list, dst_dir: Path, func_valid: callable = lambda x: x, verbose: bool = False):
    """
    Writes files in to the destination directory
    Args:
        src_files: source files to dump
        dst_dir: destination directory
        func_valid: validation function for source file
        verbose: logging flag
    """
    for idx, src_file in enumerate(src_files):
        src_file_txt = str(src_file)
        if func_valid(src_file_txt):
            dst_file = dst_dir / src_file.name
            shutil.copy2(src_file_txt, str(dst_file))
            logger.print_texts(verbose, f'{idx} data if {len(src_files)} is processed')
Esempio n. 5
0
def generate_data(src_root: Path,
                  dst_root: Path,
                  h: int = 224,
                  w: int = 224,
                  tr_dir: str = 'train',
                  val_dir: str = 'valid',
                  tst_dir: str = None,
                  lazy_read: bool = False,
                  verbose: bool = True):
    """
    Generate rotation data-set
    Args:
        src_root: source root directory
        dst_root: destination root directory
        h: height of image
        w: width of image
        tr_dir: training directory
        val_dir: validation directory
        tst_dir: test directory
        lazy_read: lazy load of classes
        verbose: logging flag
    """
    src_dirs = [src_root / tr_dir, src_root / val_dir]
    dst_dirs = [dst_root / tr_dir, dst_root / val_dir]
    logger.print_texts(
        verbose,
        f'train and validation directories src_dirs = {src_dirs}, dst_dirs = {dst_dirs}'
    )
    _add_tests(src_root,
               dst_root,
               src_dirs,
               dst_dirs,
               tst_dir=tst_dir,
               verbose=verbose)
    tr_mx = _init_rotation_matrix(h=h, w=w)
    for idx, src_dir in enumerate(src_dirs):
        if src_dir.exists():
            dst_dir = dst_dirs[idx]
            dst_dir.mkdir(exist_ok=True)
            generate_classes(src_dir,
                             dst_dir,
                             h=h,
                             w=w,
                             tr_mx=tr_mx,
                             lazy_read=lazy_read,
                             verbose=verbose)
Esempio n. 6
0
def search_dir(model: Encoder, paths: list, db_vecs: list = None, index: Path = None, n_results: int = None,
               verbose: bool = False) -> tuple:
    """
    Search files and extract
    Args:
        model: model for representation
        paths: path of images to search
        db_vecs: database vectors
        index: index file
        n_results: number of results
        verbose: logging flag

    Returns:
        res_vecs: result images
    """
    res_vecs = list()
    src_vec_bts = list(_encode(model, path) for path in paths)
    src_vecs = [(vec, img, path) for vec_bt, img_bt, path_bt in src_vec_bts for vec, img, path in
                zip(vec_bt, img_bt, path_bt) if vec_bt is not None]
    logger.print_texts(verbose, f'len(src_vecs) = {len(src_vecs)} query images are vectorized')
    dbs_vecs = db_vecs if db_vecs else load_dbvecs(str(index))
    logger.print_texts(verbose, f'{len(dbs_vecs)} is loaded from disk')
    for idx, (vec1, img, pt) in enumerate(src_vecs):
        dists = search_img(vec1, dbs_vecs, n_results=n_results, verbose=verbose)
        res_vecs.append((img, dists, pt))
        logger.print_texts(verbose, f'result for {idx + 1} out of {len(src_vecs)} extracted , path - {pt}')

    return res_vecs, db_vecs
Esempio n. 7
0
def _add_tests(src_root: Path,
               dst_root: Path,
               src_dirs: list,
               dst_dirs: list,
               tst_dir: str = None,
               verbose: bool = True):
    """
    Add test directory to source and destination paths
    Args:
        src_root: source root
        dst_root: destination root
        src_dirs: source directories
        dst_dirs: destination directories
        tst_dir: test directory name
        verbose: logging flag
    """
    if tst_dir:
        src_dirs += [src_root / tst_dir]
        dst_dirs += [dst_root / tst_dir]
        logger.print_texts(
            verbose,
            f'test directories src_dirs = {src_dirs}, dst_dirs = {dst_dirs}')
Esempio n. 8
0
def _encode_all(model: callable, *paths: list, min_siz: int = 50, verbose: bool = False, step: int = 100) -> np.ndarray:
    """
    Extract vector from image
    Args:
        model: representation extractor model
        paths: paths of images
        min_siz: minimum image size
        verbose: logging flag
        step: step to log after

    Returns:
        vec: extracted vector
        path: image path
    """
    ivld_cnt = 0
    for idx, path in enumerate(paths):
        vecs, _, valid_paths = _encode(model, *path, min_siz=min_siz)
        img_diff = len(path) - len(valid_paths)
        ivld_cnt += max(img_diff, 0)
        _log_diff(img_diff, ivld_cnt, verbose)
        if valid_paths:
            logger.print_texts(verbose and idx % step == 0,
                               f'{idx} data is processed, out of {len(paths)}, valid - {idx * len(path) - ivld_cnt}')
            yield vecs, valid_paths
Esempio n. 9
0
def generate_classes(src_dir: Path,
                     dst_dir: Path,
                     h: int = 224,
                     w: int = 224,
                     tr_mx: dict = None,
                     interpolation: int = cv2.INTER_LINEAR,
                     lazy_read: bool = False,
                     verbose: bool = False):
    """
    Generate rotation data-set for classification
    Args:
        src_dir: source directory
        dst_dir: destination directory
        h: height of image
        w: width of image
        tr_mx: rotation matrices
        interpolation: interpolation for resize
        lazy_read: lazy reading of image
        verbose: logging flag
    """
    dest_dirs = make_dirs(dst_dir)
    img_dict = {
        str(p.name): _image_data(p, h, w, interpolation, lazy_read=lazy_read)
        for p in src_dir.iterdir() if _valid_image(p)
    }
    ms = _init_rotation_matrix(h=h, w=w, tr_mx=tr_mx)
    logger.print_texts(
        verbose,
        f'generates classes for dst_dirs = {dst_dir} for rotations {ms}')
    for idx, k, v in enumerate(img_dict.items()):
        for d, m in ms.items():
            try:
                im = _read_resize(v, h, w, interpolation) if lazy_read else v
                mod = im if d == '0' else cv2.warpAffine(im, m, (h, w))
                dst = dest_dirs[d]
                dst_file = str(dst / k)
                cv2.imwrite(str(dst / k), mod)
                logger.print_texts(verbose, f'writes {dst_file}')
            except Exception as ex:
                print(f'Error on rotating image {k} ', ex)
        logger.print_texts(verbose, f'{idx} out of {len(img_dict)}')
Esempio n. 10
0
def _log_diff(img_diff: int, ivld_cnt: int, verbose: bool):
    if img_diff > 0:
        logger.print_texts(verbose, f'there are {ivld_cnt} invalid images')