def main(imagedir, sim=0.5): """Example main app using this library. Parameters ---------- imagedir : str path to directory with images sim : float (0..1) similarity index (see imagecluster.cluster()) """ dbfn = pj(imagedir, ic_base_dir, 'fingerprints.pk') if not os.path.exists(dbfn): if not os.path.exists(os.path.dirname(dbfn)): os.makedirs(os.path.dirname(dbfn)) print("no fingerprints database {} found".format(dbfn)) files = co.get_files(imagedir) model = get_model() print("running all images through NN model ...".format(dbfn)) fps = fingerprints(files, model, size=(224, 224)) co.write_pk(fps, dbfn) print("clustering ...") make_links(cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters')) else: print("loading fingerprints database {} ...".format(dbfn)) fps = co.read_pk(dbfn) print("clustering ...") make_links(cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters'))
def link_parts( ic_base_dir = ic_base_dir, sim = 0.55 ) : for dirpath, dirnames, filenames in os.walk(ic_base_dir): dircs = dirnames break for dirc in dircs: shutil.rmtree(ic_base_dir+'/'+dirc) fpdict = co.read_pk(sampled_fp_path) print("Dict has %d is clusting [sim=%f]" % (fpdict.__len__(), sim)) ic.make_links(ic.cluster(dict(fpdict), sim, method='average') , ic_base_dir + '/cluster') # resultFilter.filter_folder(ic_base_dir,total_num=fpdict.__len__(),img_folder=img_folder_path) del fpdict
def get_fp(): fp = dict(co.read_pk(full_fp_path)) new_dic = {} print fp.keys()[0:5] result_list = pd.read_csv(converted_txt_path, header=None)[0] i = 0 num = result_list.__len__() for res in result_list: i += 1 new_dic[res] = fp[res] sys.stdout.write('\r[%.2f%%]' % (i * 100.0 / num)) sys.stdout.flush() print ('\n') co.write_pk(new_dic,sampled_fp_path)
def get_iter_fp(txt_path, save_path, formerFP_path): txt_path += '/nones.txt' formerFP_path += '/fingerprints.pk' files = pd.read_csv(txt_path, header=None, index_col=None) files = list(files[0]) random.shuffle(files) random.shuffle(files) print('nones.txt contain %d ' % files.__len__()) fpdict = co.read_pk(formerFP_path) print('former fingerprints contain %d ' % len(fpdict)) newdict = {} for fname in files: tkey = imagedir + '/' + fname newdict[tkey] = fpdict[tkey] co.write_pk(newdict, save_path + '/fingerprints.pk')
def main(imagedir, sim=0.5, layer='fc2', size=(224,224), links=True, vis=False, max_csize=None, pca=False, pca_params=dict(n_components=0.9)): """Example main app using this library. Upon first invocation, the image and fingerprint databases are built and written to disk. Each new invocation loads those and only repeats * clustering * creation of links to files in clusters * visualization (if `vis=True`) This is good for playing around with the `sim` parameter, for instance, which only influences clustering. Parameters ---------- imagedir : str path to directory with images sim : float (0..1) similarity index (see :func:`calc.cluster`) layer : str which layer to use as feature vector (see :func:`calc.get_model`) size : tuple input image size (width, height), must match `model`, e.g. (224,224) links : bool create dirs with links vis : bool plot images in clusters max_csize : max number of images per cluster for visualization (see :mod:`~postproc`) pca : bool Perform PCA on fingerprints before clustering, using `pca_params`. pca_params : dict kwargs to sklearn's PCA Notes ----- imagedir : To select only a subset of the images, create an `imagedir` and symlink your selected images there. In the future, we may add support for passing a list of files, should the need arise. But then again, this function is only an example front-end. """ fps_fn = pj(imagedir, ic_base_dir, 'fingerprints.pk') ias_fn = pj(imagedir, ic_base_dir, 'images.pk') ias = None if not os.path.exists(fps_fn): print(f"no fingerprints database {fps_fn} found") os.makedirs(os.path.dirname(fps_fn), exist_ok=True) model = ic.get_model(layer=layer) if not os.path.exists(ias_fn): print(f"create image array database {ias_fn}") ias = ic.image_arrays(imagedir, size=size) co.write_pk(ias, ias_fn) else: ias = co.read_pk(ias_fn) print("running all images through NN model ...") fps = ic.fingerprints(ias, model) co.write_pk(fps, fps_fn) else: print(f"loading fingerprints database {fps_fn} ...") fps = co.read_pk(fps_fn) if pca: fps = ic.pca(fps, **pca_params) print("pca dims:", list(fps.values())[0].shape[0]) print("clustering ...") clusters = ic.cluster(fps, sim) if links: pp.make_links(clusters, pj(imagedir, ic_base_dir, 'clusters')) if vis: if ias is None: ias = co.read_pk(ias_fn) pp.visualize(clusters, ias, max_csize=max_csize)
def main_kmeans(imagedir, n_clusters=5, layer='fc2', size=(224, 224), links=True, pca=False, pca_params=dict(n_components=0.9)): """Example main app using this library. Upon first invocation, the image and fingerprint databases are built and written to disk. Each new invocation loads those and only repeats * clustering * creation of links to files in clusters * visualization (if `vis=True`) This is good for playing around with the `sim` parameter, for instance, which only influences clustering. Parameters ---------- imagedir : str path to directory with images n_cluster : int (1...999) num of kmeans cluster (see :func:`calc.cluster_kmeans`) layer : str which layer to use as feature vector (see :func:`calc.get_model`) size : tuple input image size (width, height), must match `model`, e.g. (224,224) links : bool create dirs with links pca : bool Perform PCA on fingerprints before clustering, using `pca_params`. pca_params : dict kwargs to sklearn's PCA Notes ----- imagedir : To select only a subset of the images, create an `imagedir` and symlink your selected images there. In the future, we may add support for passing a list of files, should the need arise. But then again, this function is only an example front-end. """ fps_fn = pj(imagedir, ic_base_dir, 'fingerprints.pk') ias_fn = pj(imagedir, ic_base_dir, 'images.pk') ias = None logger_kmeans = log(logger_name='kmeans').logger try: if not os.path.exists(fps_fn): print("no fingerprints database {} found".format(fps_fn)) logger_kmeans.info( "no fingerprints database {} found".format(fps_fn)) os.makedirs(os.path.dirname(fps_fn), exist_ok=True) try: model = ic.get_model(layer=layer) except Exception as e: logger_kmeans.error(e) if not os.path.exists(ias_fn): logger_kmeans.info( "create image array database {}".format(ias_fn)) print("create image array database {}".format(ias_fn)) ias = ic.image_arrays(imagedir, size=size) co.write_pk(ias, ias_fn) else: ias = co.read_pk(ias_fn) print("running all images through NN model ...") fps = ic.fingerprints(ias, model) co.write_pk(fps, fps_fn) else: print("loading fingerprints database {} ...".format(fps_fn)) fps = co.read_pk(fps_fn) if pca: fps = ic.pca(fps, **pca_params) print("pca dims:", list(fps.values())[0].shape[0]) logger_kmeans.info("pca dims: " + str(list(fps.values())[0].shape[0])) #将每张图片转换成向量 #进行聚类 print("clustering ...") logger_kmeans.info("clustering ...") clusters = ic.cluster_kmeans(fps, n_clusters=n_clusters) if links: pp.make_links_v2(clusters, pj(imagedir, ic_base_dir, 'clusters')) except Exception as e: logger_kmeans.error(e)