def main(imagedir, sim=0.5): """Example main app using this library. Parameters ---------- imagedir : str path to directory with images sim : float (0..1) similarity index (see imagecluster.cluster()) """ dbfn = pj(imagedir, ic_base_dir, 'fingerprints.pk') # print("dbfn= " + dbfn) if not os.path.exists(dbfn): os.makedirs(os.path.dirname(dbfn), exist_ok=True) print("no fingerprints database {} found".format(dbfn)) files = co.get_files(imagedir) model = ic.get_model() print("running all images through NN model ...".format(dbfn)) fps = ic.fingerprints(files, model, size=(224, 224)) # print(fps) co.write_pk(fps, dbfn) else: print("loading fingerprints database {} ...".format(dbfn)) fps = co.read_pk(dbfn) print("clustering ...") print(len(fps)) clusters = ic.cluster(fps, sim) help(imagedir, clusters) ic.make_links(ic.cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters'))
def get_fp(imagedir,ic_base_dir = 'imagecluster'): dbfn = pj(ic_base_dir, 'fingerprints.pk') if os.path.exists(os.path.dirname(dbfn)): shutil.rmtree(os.path.dirname(dbfn)) os.makedirs(os.path.dirname(dbfn)) # , exist_ok=True print("no fingerprints database {} found".format(dbfn)) files = co.get_files(imagedir) model = ic.get_model() print("running all images through NN model ...".format(dbfn)) fps = ic.fingerprints(files, model, size=(224, 224)) co.write_pk(fps, dbfn)
def main(imagedir, processingDir, similarity=.4): imageFeaturePath = pathJoin(processingDir, 'imagefeatures.pk') if not os.path.exists(imageFeaturePath): common.makeDir(imageFeaturePath) print("No imagefeatures database {} found".format(imageFeaturePath)) files = common.get_files(imagedir) model = imagecluster.get_model() fps = imagecluster.fingerprints(files, model, size=(224, 224)) common.write_pk(fps, imageFeaturePath) else: print("loading fingerprints database {} ...".format(imageFeaturePath)) fps = common.read_pk(imageFeaturePath) print("clustering ...") imagecluster.make_links(imagecluster.cluster(fps, similarity), pathJoin(imagedir, processingDir, 'clusters'))
def spiltFP(fpdir=fingerPrintDir, ic_base_dir=ic_base_dir): """ Spilt a large finger-print file to several smaller ones :param fpdir: The source finger-print file direction :param ic_base_dir: Divided ones root location :param spiltnum: The num to spilt the source file :return: nothing """ fps = co.read_pk(fpdir + '/fingerprints.pk') print('total num of fingerprints : %d' % fps.__len__()) step = stepNum # Pre-set step len = fps.__len__() spiltnum = len / step if spiltnum > 1 and (len - step * (spiltnum - 1)) < step / 2: spiltnum -= 1 step = math.ceil(len * 1.0 / spiltnum) dicts = [{}] i = 0 count = 0 for k, v in fps.items(): dicts[i][k] = v count += 1 if (count == step): i += 1 count = 0 dicts.append({}) print('Spilt fps in %d dicts ' % (i + 1)) i = 0 for fpdict in dicts: fp_newdir = ic_base_dir + '/part' + str(i) + '/fingerprints.pk' print("[dict%d] has %d element writen in %s " % (i, fpdict.__len__(), fp_newdir)) if not os.path.exists(ic_base_dir + '/part' + str(i)): os.makedirs(ic_base_dir + '/part' + str(i)) co.write_pk(fpdict, fp_newdir) i += 1 del fps, dicts
def cluster(df, dbfn): print("> Clustering ...") fingerprint_column = 'cropped_fingerprints' sim = 0.5 fingerprintdict = df.set_index('filename')[fingerprint_column].to_dict() # cluster and save files in folders ic.make_links(ic.cluster(fingerprintdict, sim), os.path.join(imagedir, ic_base_dir, 'clusters')) # cluster and save results in dataframe fps = df[fingerprint_column] dfps = distance.pdist(np.array(list(fps)), metric='euclidean') Z = hierarchy.linkage(dfps, method='average', metric='euclidean') cut = hierarchy.fcluster(Z, t=dfps.max()*(1.0-sim), criterion='distance') df['cluster'] = cut # save database to file co.write_pk(df, dbfn) print("done.")
def init_df(): # initialize dataframe and run pipeline dbfn = os.path.join(imagedir, ic_base_dir, 'db.pk') if not os.path.exists(dbfn): os.makedirs(os.path.dirname(dbfn), exist_ok=True) print("no fingerprints database found in {}".format(dbfn)) #fps = ic.fingerprints(files, model, size=(input_size,input_size), modelname=modelname) df_exists = 'df' in locals() or 'df' in globals() if not df_exists: print("Running processing pipeline ...") df = process_dataset(imagedir) else: print("df exists already.") print("writing {}".format(dbfn)) co.write_pk(df, dbfn) else: print("loading fingerprints database {} ...".format(dbfn)) df = co.read_pk(dbfn) print("done.") return df, dbfn