def main(imagedir, sim=0.5): """Example main app using this library. Parameters ---------- imagedir : str path to directory with images sim : float (0..1) similarity index (see imagecluster.cluster()) """ dbfn = pj(imagedir, ic_base_dir, 'fingerprints.pk') # print("dbfn= " + dbfn) if not os.path.exists(dbfn): os.makedirs(os.path.dirname(dbfn), exist_ok=True) print("no fingerprints database {} found".format(dbfn)) files = co.get_files(imagedir) model = ic.get_model() print("running all images through NN model ...".format(dbfn)) fps = ic.fingerprints(files, model, size=(224, 224)) # print(fps) co.write_pk(fps, dbfn) else: print("loading fingerprints database {} ...".format(dbfn)) fps = co.read_pk(dbfn) print("clustering ...") print(len(fps)) clusters = ic.cluster(fps, sim) help(imagedir, clusters) ic.make_links(ic.cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters'))
def linkParts(ic_base_dir=ic_base_dir, sim=sim): """ Link all divided part by each divided finger-print :param ic_base_dir: :param sim: :return: """ for dirpath, dirnames, filenames in os.walk(ic_base_dir): dircs = dirnames break if dircs.__len__() == 0: print('Try to link images , no former folder found !') spiltFP(fpdir=fingerPrintDir, ic_base_dir=ic_base_dir) for dirpath, dirnames, filenames in os.walk(ic_base_dir): dircs = dirnames break i = 0 for f_dir in dircs: fpdict = co.read_pk(ic_base_dir + '/' + f_dir + '/fingerprints.pk') print("[dict%d] with %d is clusting [sim=%f]" % (i, fpdict.__len__(), sim)) ic.make_links(ic.cluster(dict(fpdict), sim, method='average'), ic_base_dir + '/part' + str(i) + '/cluster') i += 1 del fpdict
def main(imagedir, processingDir, similarity=.4): imageFeaturePath = pathJoin(processingDir, 'imagefeatures.pk') if not os.path.exists(imageFeaturePath): common.makeDir(imageFeaturePath) print("No imagefeatures database {} found".format(imageFeaturePath)) files = common.get_files(imagedir) model = imagecluster.get_model() fps = imagecluster.fingerprints(files, model, size=(224, 224)) common.write_pk(fps, imageFeaturePath) else: print("loading fingerprints database {} ...".format(imageFeaturePath)) fps = common.read_pk(imageFeaturePath) print("clustering ...") imagecluster.make_links(imagecluster.cluster(fps, similarity), pathJoin(imagedir, processingDir, 'clusters'))
def cluster(df, dbfn): print("> Clustering ...") fingerprint_column = 'cropped_fingerprints' sim = 0.5 fingerprintdict = df.set_index('filename')[fingerprint_column].to_dict() # cluster and save files in folders ic.make_links(ic.cluster(fingerprintdict, sim), os.path.join(imagedir, ic_base_dir, 'clusters')) # cluster and save results in dataframe fps = df[fingerprint_column] dfps = distance.pdist(np.array(list(fps)), metric='euclidean') Z = hierarchy.linkage(dfps, method='average', metric='euclidean') cut = hierarchy.fcluster(Z, t=dfps.max()*(1.0-sim), criterion='distance') df['cluster'] = cut # save database to file co.write_pk(df, dbfn) print("done.")
def linkTest(ic_base_dir=ic_base_dir, sim=sim): """ Random link a divided part by its divided finger-print :param ic_base_dir: :param sim: :return: """ for dirpath, dirnames, filenames in os.walk(ic_base_dir): dircs = dirnames break if dircs.__len__() == 0: print('no former folder found !') spiltFP() print('Link test') f_dir = dircs[random.randint(0, dircs.__len__() - 1)] fpdict = co.read_pk(ic_base_dir + '/' + f_dir + '/fingerprints.pk') print("[%s] with %d is clusting " % (f_dir, fpdict.__len__())) ic.make_links(ic.cluster(dict(fpdict), sim, method='average'), ic_base_dir + '/' + f_dir + '/cluster') del fpdict gc.collect()