Exemple #1
0
def main(imagedir, sim=0.5):
    """Example main app using this library.

    Parameters
    ----------
    imagedir : str
        path to directory with images
    sim : float (0..1)
        similarity index (see imagecluster.cluster())
    """
    dbfn = pj(imagedir, ic_base_dir, 'fingerprints.pk')
    # print("dbfn= " + dbfn)
    if not os.path.exists(dbfn):
        os.makedirs(os.path.dirname(dbfn), exist_ok=True)
        print("no fingerprints database {} found".format(dbfn))
        files = co.get_files(imagedir)
        model = ic.get_model()
        print("running all images through NN model ...".format(dbfn))
        fps = ic.fingerprints(files, model, size=(224, 224))
        # print(fps)
        co.write_pk(fps, dbfn)
    else:
        print("loading fingerprints database {} ...".format(dbfn))
        fps = co.read_pk(dbfn)
    print("clustering ...")
    print(len(fps))

    clusters = ic.cluster(fps, sim)
    help(imagedir, clusters)

    ic.make_links(ic.cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters'))
Exemple #2
0
def get_fp(imagedir,ic_base_dir = 'imagecluster'):

    dbfn = pj(ic_base_dir, 'fingerprints.pk')
    if os.path.exists(os.path.dirname(dbfn)):
        shutil.rmtree(os.path.dirname(dbfn))
    os.makedirs(os.path.dirname(dbfn))  # , exist_ok=True
    print("no fingerprints database {} found".format(dbfn))
    files = co.get_files(imagedir)
    model = ic.get_model()
    print("running all images through NN model ...".format(dbfn))
    fps = ic.fingerprints(files, model, size=(224, 224))
    co.write_pk(fps, dbfn)
Exemple #3
0
def main(imagedir, processingDir, similarity=.4):

    imageFeaturePath = pathJoin(processingDir, 'imagefeatures.pk')
    if not os.path.exists(imageFeaturePath):
        common.makeDir(imageFeaturePath)
        print("No imagefeatures database {} found".format(imageFeaturePath))
        files = common.get_files(imagedir)
        model = imagecluster.get_model()
        fps = imagecluster.fingerprints(files, model, size=(224, 224))
        common.write_pk(fps, imageFeaturePath)
    else:
        print("loading fingerprints database {} ...".format(imageFeaturePath))
        fps = common.read_pk(imageFeaturePath)
    print("clustering ...")
    imagecluster.make_links(imagecluster.cluster(fps, similarity),
                            pathJoin(imagedir, processingDir, 'clusters'))
def spiltFP(fpdir=fingerPrintDir, ic_base_dir=ic_base_dir):
    """
    Spilt a large finger-print file to several smaller ones
    :param fpdir: The source finger-print file direction
    :param ic_base_dir: Divided ones root location
    :param spiltnum: The num to spilt the source file
    :return: nothing
    """

    fps = co.read_pk(fpdir + '/fingerprints.pk')
    print('total num of fingerprints : %d' % fps.__len__())

    step = stepNum  # Pre-set step

    len = fps.__len__()
    spiltnum = len / step

    if spiltnum > 1 and (len - step * (spiltnum - 1)) < step / 2:
        spiltnum -= 1

    step = math.ceil(len * 1.0 / spiltnum)

    dicts = [{}]

    i = 0
    count = 0

    for k, v in fps.items():
        dicts[i][k] = v
        count += 1
        if (count == step):
            i += 1
            count = 0
            dicts.append({})
    print('Spilt fps in %d dicts ' % (i + 1))

    i = 0
    for fpdict in dicts:
        fp_newdir = ic_base_dir + '/part' + str(i) + '/fingerprints.pk'
        print("[dict%d] has %d element writen in %s " %
              (i, fpdict.__len__(), fp_newdir))
        if not os.path.exists(ic_base_dir + '/part' + str(i)):
            os.makedirs(ic_base_dir + '/part' + str(i))
        co.write_pk(fpdict, fp_newdir)
        i += 1
    del fps, dicts
Exemple #5
0
def cluster(df, dbfn):
	print("> Clustering ...")
	fingerprint_column = 'cropped_fingerprints'
	sim = 0.5

	fingerprintdict = df.set_index('filename')[fingerprint_column].to_dict()
	# cluster and save files in folders
	ic.make_links(ic.cluster(fingerprintdict, sim), os.path.join(imagedir, ic_base_dir, 'clusters'))

	# cluster and save results in dataframe
	fps = df[fingerprint_column]
	dfps = distance.pdist(np.array(list(fps)), metric='euclidean')
	Z = hierarchy.linkage(dfps, method='average', metric='euclidean')
	cut = hierarchy.fcluster(Z, t=dfps.max()*(1.0-sim), criterion='distance')
	df['cluster'] = cut

	# save database to file
	co.write_pk(df, dbfn)
	print("done.")
Exemple #6
0
def init_df():
	# initialize dataframe and run pipeline
	dbfn = os.path.join(imagedir, ic_base_dir, 'db.pk')
	if not os.path.exists(dbfn):
	    os.makedirs(os.path.dirname(dbfn), exist_ok=True)
	    print("no fingerprints database found in {}".format(dbfn))
	    #fps = ic.fingerprints(files, model, size=(input_size,input_size), modelname=modelname)
	    df_exists = 'df' in locals() or 'df' in globals()
	    if not df_exists:
	        print("Running processing pipeline ...")
	        df = process_dataset(imagedir)
	    else:
	        print("df exists already.")
	    print("writing {}".format(dbfn))
	    co.write_pk(df, dbfn)
	else:
	    print("loading fingerprints database {} ...".format(dbfn))
	    df = co.read_pk(dbfn)
	    print("done.")	
	return df, dbfn