def KMeans_A(rootdir, ft): pos = [] imgspos = [] if 0 == cmp(ft, 'lbp'): print "ft : LBP" gbf = ilbpf.LBP_FEAT() elif 0 == cmp(ft, 'gabor'): print "ft : GABOR" gbf = igbf.GABOR_FEAT() elif 0 == cmp(ft, 'hog'): print 'ft : HOG' gbf = ihogf.HOG_FEAT() elif 0 == cmp(ft, 'dwt'): print 'ft : DWT' gbf = idwtf.DWT_FEAT() else: print 'unknown ft' return fvs, imgs = gbf.gen_folder(rootdir, 5000) if fvs is None: print 'JPG None ', rootdir return pos.extend(fvs) imgspos.extend(imgs) samples = np.array(pos) imgs = imgspos com_num = np.minimum(300, samples.shape[0] - 10) clf = PCA(com_num) print 'before pca : ', samples.shape samples = clf.fit_transform(samples) print 'after pca : ', samples.shape clf = KMeans(n_clusters=2, n_jobs=-2, verbose=0) prds = clf.fit_predict(samples) line0 = "" line1 = "" # line2 = "" # line3 = "" for k in range(len(prds)): if prds[k] == 0: line0 += imgs[k] + '\n' elif prds[k] == 1: line1 += imgs[k] + '\n' # elif prds[k] == 2: # line2 += imgs[k] + '\n' # else: # line3 += imgs[k] + '\n' with open('A.txt', 'w') as f: f.writelines(line0) with open('B.txt', 'w') as f: f.writelines(line1) # with open('C.txt', 'w') as f: # f.writelines(line2) # with open('D.txt', 'w') as f: # f.writelines(line3) return
def DIST_B(rootdir, folderA, folderB, ft): pos = [] neg = [] imgspos = [] imgsneg = [] if 0 == cmp(ft, 'gabor'): print 'feature type: GABOR' gbf = igbf.GABOR_FEAT() elif 0 == cmp(ft, 'lbp'): print 'feature type: LBP' gbf = ilbpf.LBP_FEAT() elif 0 == cmp(ft, 'hog'): print 'feature type: HOG' gbf = ihogf.HOG_FEAT() elif 0 == cmp(ft, 'dwt'): print 'feature type: DWT' gbf = idwtf.DWT_FEAT() else: print 'unknown feature type' return #1--folderA fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderA), 1000) pos.extend(fvs) imgspos.extend(imgs) #2--folderB fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderB), 1000) neg.extend(fvs) imgsneg.extend(imgs) #3--match samples = np.array(pos + neg) imgs = imgspos + imgsneg com_num = 300 if com_num + 10 > len(imgs): com_num = len(imgs) - 10 clf_pca = PCA(com_num) samples = clf_pca.fit_transform(samples) print 'after pca : ', samples.shape dists = calc_distance_set(samples[0:len(pos), :], samples[len(pos):, :], 8) templist = [] for dist in dists: for d, j in dist: templist.append(imgsneg[j]) templist = list(set(templist)) lineA = "" for imgs in templist: lineA += imgs + '\n' with open('A.txt', 'w') as f: f.writelines(lineA) return
def DIST_A(rootdir, posdir, posnum, negnum_p, ft): pos = [] neg = [] pathpos = [] pathneg = [] folders = [] imgspos = [] imgsneg = [] with open('list.txt', 'r') as f: for line in f: line = line.strip() folders.append(line) if 0 == cmp(ft, 'gabor'): print 'feature type: GABOR' gbf = igbf.GABOR_FEAT() elif 0 == cmp(ft, 'lbp'): print 'feature type: LBP' gbf = ilbpf.LBP_FEAT() elif 0 == cmp(ft, 'hog'): print 'feature type: HOG' gbf = ihogf.HOG_FEAT() elif 0 == cmp(ft, 'dwt'): print 'feature type: DWT' gbf = idwtf.DWT_FEAT() else: print 'unknown feature type' return for folder in folders: fname = os.path.join(rootdir, folder) if 0 == cmp(folder, posdir): fvs, imgs = gbf.gen_folder(fname, posnum) if fvs is None: print 'pos None ', fname continue pos.extend(fvs) imgspos.extend(imgs) pathpos.extend([folder for k in range(len(fvs))]) else: fvs, imgs = gbf.gen_folder(fname, negnum_p) if fvs is None: print 'neg None ', fname continue neg.extend(fvs) imgsneg.extend(imgs) pathneg.extend([folder for k in range(len(fvs))]) samples = np.array(pos + neg) paths = pathpos + pathneg imgs = imgspos + imgsneg con_num = 300 if con_num >= len(imgs): con_num = len(imgs) - 10 clf = PCA(con_num) samples = clf.fit_transform(samples) print 'after pca : ', samples.shape topN = posnum * 3 dists = calc_distance_set(samples, samples, topN) smap = {} for k in range(samples.shape[0]): pospath = paths[k] if pospath not in smap: smap[pospath] = {} for dist, j in dists[k]: negpath = paths[j] if negpath not in smap[pospath]: smap[pospath][negpath] = 99999.0 if smap[pospath][negpath] > dist: smap[pospath][negpath] = dist slist = sorted(smap.iteritems(), key=lambda k: k[0]) line = "" for pospath, negpaths in slist: line += pospath + '\n' negpaths = sorted(negpaths.iteritems(), key=lambda k: k[1], reverse=False) for negpath, cnt in negpaths: line += ' ' + negpath + '(' + str(cnt) + ')\n' with open('result.txt', 'w') as f: f.writelines(line) try: shutil.rmtree('out/') except Exception as e: print 'catch exception ', e os.mkdir('out/') for pospath, negpaths in slist: print 'create viewset for ', pospath os.mkdir('out/' + pospath) negpaths = sorted(negpaths.iteritems(), key=lambda k: k[1], reverse=False) for negpath, cnt in negpaths: dist = np.int32(cnt * 100) copy_rename_jpgs(rootdir, negpath, 'out\\', pospath, 4, dist) return
def LDA_A(rootdir, posdir, posnum, negnum_p, ft): pos = [] neg = [] pathpos = [] pathneg = [] folders = [] imgspos = [] imgsneg = [] with open('list.txt', 'r') as f: for line in f: line = line.strip() folders.append(line) if 0 == cmp(ft, 'gabor'): print 'feature type: GABOR' gbf = igbf.GABOR_FEAT() elif 0 == cmp(ft, 'hog'): print 'feature type: HOG' gbf = ihogf.HOG_FEAT() elif 0 == cmp(ft, 'lbp'): print 'feature type: LBP' gbf = ilbpf.LBP_FEAT() elif 0 == cmp(ft, 'dwt'): print 'feature type: DWT' gbf = idwtf.DWT_FEAT() elif 0 == cmp(ft, 'yuv'): print 'feature type: YUV' gbf = iyuvf.YUV_FEAT() else: print 'unknown feature type' return for folder in folders: fname = os.path.join(rootdir, folder) if 0 == cmp(folder, posdir): fvs, imgs = gbf.gen_folder(fname, posnum) if fvs is None: print 'pos None ', fname continue pos.extend(fvs) imgspos.extend(imgs) pathpos.extend([folder for k in range(len(fvs))]) else: fvs, imgs = gbf.gen_folder(fname, negnum_p) if fvs is None: print 'neg None ', fname continue neg.extend(fvs) imgsneg.extend(imgs) pathneg.extend([folder for k in range(len(fvs))]) label0 = [0 for k in range(len(pos))] label1 = [1 for k in range(len(neg))] samples = np.array(pos + neg) labels = np.array(label0 + label1) paths = pathpos + pathneg imgs = imgspos + imgsneg #com_num = np.minimum(300, samples.shape[0] - 10) clf = PCA(0.98) samples = clf.fit_transform(samples) print 'after pca : ', samples.shape clf = LDA() clf.fit(samples, labels) cnf = clf.decision_function(samples) X = [] for k in range(len(paths)): X.append((paths[k], cnf[k], imgs[k])) X = sorted(X, key=lambda a: a[1]) line = "" lineA = "" #sometimes, the positive set is split into two parts lineB = "" for path, cnf, img in X: line += str(cnf) + ' ' + path + ' ' + img + '\n' if 0 != cmp(path, posdir): continue if cnf > 0: lineA += img + '\n' else: lineB += img + '\n' with open('A.txt', 'w') as f: f.writelines(lineA) with open('B.txt', 'w') as f: f.writelines(lineB) with open('result.txt', 'w') as f: f.writelines(line) return
def LDA_B(rootdir, folderA, folderB, folderC, ft): pos = [] neg = [] imgspos = [] imgsneg = [] if 0 == cmp(ft, 'gabor'): print 'feature type: GABOR' gbf = igbf.GABOR_FEAT() elif 0 == cmp(ft, 'hog'): print 'feature type: HOG' gbf = ihogf.HOG_FEAT() elif 0 == cmp(ft, 'lbp'): print 'feature type: LBP' gbf = ilbpf.LBP_FEAT() elif 0 == cmp(ft, 'dwt'): print 'feature type: DWT' gbf = idwtf.DWT_FEAT() elif 0 == cmp(ft, 'yuv'): print 'feature type: YUV' gbf = iyuvf.YUV_FEAT() else: print 'unknown feature type' return #1--class A fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderA), 1000) pos.extend(fvs) imgspos.extend(imgs) #2--class B fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderB), 1000) neg.extend(fvs) imgsneg.extend(imgs) #3--train label0 = [0 for k in range(len(pos))] label1 = [1 for k in range(len(neg))] samples = np.array(pos + neg) labels = np.array(label0 + label1) imgs = imgspos + imgsneg print 'before pca : ', samples.shape # com_numx = 300 # if com_numx + 10 > len(imgs): # com_numx = len(imgs) - 10 clf_pca = PCA(0.95) samples = clf_pca.fit_transform(samples) print 'after pca : ', samples.shape clf_lda = LDA() clf_lda.fit(samples, labels) #4--predict fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderC), 100000) samples = np.array(fvs) samples = clf_pca.transform(samples) cnf = clf_lda.decision_function(samples) X = [] for k in range(len(imgs)): X.append((cnf[k], imgs[k])) X = sorted(X, key=lambda a: a[0]) lineA = "" #sometimes, the positive set is split into two parts lineB = "" for cnf, img in X: if cnf > 0: lineA += img + '\n' else: lineB += img + '\n' with open('A.txt', 'w') as f: f.writelines(lineA) with open('B.txt', 'w') as f: f.writelines(lineB) return
def KNN_A(rootdir, posdir, posnum, negnum_p): pos = [] neg = [] pathpos = [] pathneg = [] folders = [] imgspos = [] imgsneg = [] with open('list.txt', 'r') as f: for line in f: line = line.strip() folders.append(line) gbf = igbf.GABOR_FEAT() for folder in folders: fname = os.path.join(rootdir, folder) if 0 == cmp(folder, posdir): fvs, imgs = gbf.gen_folder(fname, posnum) if fvs is None: print 'pos None ', fname continue pos.extend(fvs) imgspos.extend(imgs) pathpos.extend([folder for k in range(len(fvs))]) else: fvs, imgs = gbf.gen_folder(fname, negnum_p) if fvs is None: print 'neg None ', fname continue neg.extend(fvs) imgsneg.extend(imgs) pathneg.extend([folder for k in range(len(fvs))]) label0 = [0 for k in range(len(pos))] label1 = [1 for k in range(len(neg))] samples = np.array(pos + neg) labels = np.array(label0 + label1) paths = pathpos + pathneg imgs = imgspos + imgsneg clf = PCA(100) print 'before pca : ', samples.shape samples = clf.fit_transform(samples) print 'after pca : ', samples.shape if 0: clf = KNeighborsClassifier(5) clf.fit(samples, labels) res = [] for k in range(samples.shape[0]): prd = clf.predict(samples[k, :]) res.append((paths[k], prd)) res = sorted(res, key=lambda k: k[0]) line = "" for path, prd in res: line += path + ' ' + str(prd) + '\n' with open('result.txt', 'w') as f: f.writelines(line) else: clf = NearestNeighbors(5).fit(samples) dists, idxs = clf.kneighbors(samples, 5) line = "" for k in range(len(idxs)): for j in range(len(idxs[k])): line += paths[idxs[k][j]] + ' ' line += '\n' with open('result.txt', 'w') as f: f.writelines(line) return