Esempio n. 1
0
def KMeans_A(rootdir, ft):
    pos = []
    imgspos = []
    if 0 == cmp(ft, 'lbp'):
        print "ft : LBP"
        gbf = ilbpf.LBP_FEAT()
    elif 0 == cmp(ft, 'gabor'):
        print "ft : GABOR"
        gbf = igbf.GABOR_FEAT()
    elif 0 == cmp(ft, 'hog'):
        print 'ft : HOG'
        gbf = ihogf.HOG_FEAT()
    elif 0 == cmp(ft, 'dwt'):
        print 'ft : DWT'
        gbf = idwtf.DWT_FEAT()
    else:
        print 'unknown ft'
        return
    fvs, imgs = gbf.gen_folder(rootdir, 5000)
    if fvs is None:
        print 'JPG None ', rootdir
        return
    pos.extend(fvs)
    imgspos.extend(imgs)
    samples = np.array(pos)
    imgs = imgspos
    com_num = np.minimum(300, samples.shape[0] - 10)
    clf = PCA(com_num)
    print 'before pca : ', samples.shape
    samples = clf.fit_transform(samples)
    print 'after pca : ', samples.shape
    clf = KMeans(n_clusters=2, n_jobs=-2, verbose=0)
    prds = clf.fit_predict(samples)
    line0 = ""
    line1 = ""
    # line2 = ""
    # line3 = ""
    for k in range(len(prds)):
        if prds[k] == 0:
            line0 += imgs[k] + '\n'
        elif prds[k] == 1:
            line1 += imgs[k] + '\n'
#     elif prds[k] == 2:
#         line2 += imgs[k] + '\n'
#     else:
#         line3 += imgs[k] + '\n'
    with open('A.txt', 'w') as f:
        f.writelines(line0)
    with open('B.txt', 'w') as f:
        f.writelines(line1)

# with open('C.txt', 'w') as f:
#     f.writelines(line2)
# with open('D.txt', 'w') as f:
#     f.writelines(line3)
    return
Esempio n. 2
0
def DIST_B(rootdir, folderA, folderB, ft):
    pos = []
    neg = []
    imgspos = []
    imgsneg = []

    if 0 == cmp(ft, 'gabor'):
        print 'feature type: GABOR'
        gbf = igbf.GABOR_FEAT()
    elif 0 == cmp(ft, 'lbp'):
        print 'feature type: LBP'
        gbf = ilbpf.LBP_FEAT()
    elif 0 == cmp(ft, 'hog'):
        print 'feature type: HOG'
        gbf = ihogf.HOG_FEAT()
    elif 0 == cmp(ft, 'dwt'):
        print 'feature type: DWT'
        gbf = idwtf.DWT_FEAT()
    else:
        print 'unknown feature type'
        return

    #1--folderA
    fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderA), 1000)
    pos.extend(fvs)
    imgspos.extend(imgs)
    #2--folderB
    fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderB), 1000)
    neg.extend(fvs)
    imgsneg.extend(imgs)

    #3--match
    samples = np.array(pos + neg)
    imgs = imgspos + imgsneg
    com_num = 300
    if com_num + 10 > len(imgs):
        com_num = len(imgs) - 10
    clf_pca = PCA(com_num)
    samples = clf_pca.fit_transform(samples)
    print 'after pca : ', samples.shape
    dists = calc_distance_set(samples[0:len(pos), :], samples[len(pos):, :], 8)

    templist = []
    for dist in dists:
        for d, j in dist:
            templist.append(imgsneg[j])
    templist = list(set(templist))
    lineA = ""
    for imgs in templist:
        lineA += imgs + '\n'
    with open('A.txt', 'w') as f:
        f.writelines(lineA)
    return
Esempio n. 3
0
def DIST_A(rootdir, posdir, posnum, negnum_p, ft):
    pos = []
    neg = []
    pathpos = []
    pathneg = []
    folders = []
    imgspos = []
    imgsneg = []
    with open('list.txt', 'r') as f:
        for line in f:
            line = line.strip()
            folders.append(line)
    if 0 == cmp(ft, 'gabor'):
        print 'feature type: GABOR'
        gbf = igbf.GABOR_FEAT()
    elif 0 == cmp(ft, 'lbp'):
        print 'feature type: LBP'
        gbf = ilbpf.LBP_FEAT()
    elif 0 == cmp(ft, 'hog'):
        print 'feature type: HOG'
        gbf = ihogf.HOG_FEAT()
    elif 0 == cmp(ft, 'dwt'):
        print 'feature type: DWT'
        gbf = idwtf.DWT_FEAT()
    else:
        print 'unknown feature type'
        return

    for folder in folders:
        fname = os.path.join(rootdir, folder)
        if 0 == cmp(folder, posdir):
            fvs, imgs = gbf.gen_folder(fname, posnum)
            if fvs is None:
                print 'pos None ', fname
                continue
            pos.extend(fvs)
            imgspos.extend(imgs)
            pathpos.extend([folder for k in range(len(fvs))])
        else:
            fvs, imgs = gbf.gen_folder(fname, negnum_p)
            if fvs is None:
                print 'neg None ', fname
                continue
            neg.extend(fvs)
            imgsneg.extend(imgs)
            pathneg.extend([folder for k in range(len(fvs))])
    samples = np.array(pos + neg)
    paths = pathpos + pathneg
    imgs = imgspos + imgsneg
    con_num = 300
    if con_num >= len(imgs):
        con_num = len(imgs) - 10
    clf = PCA(con_num)
    samples = clf.fit_transform(samples)
    print 'after pca : ', samples.shape
    topN = posnum * 3
    dists = calc_distance_set(samples, samples, topN)

    smap = {}
    for k in range(samples.shape[0]):
        pospath = paths[k]
        if pospath not in smap:
            smap[pospath] = {}
        for dist, j in dists[k]:
            negpath = paths[j]
            if negpath not in smap[pospath]:
                smap[pospath][negpath] = 99999.0
            if smap[pospath][negpath] > dist:
                smap[pospath][negpath] = dist

    slist = sorted(smap.iteritems(), key=lambda k: k[0])

    line = ""
    for pospath, negpaths in slist:
        line += pospath + '\n'
        negpaths = sorted(negpaths.iteritems(),
                          key=lambda k: k[1],
                          reverse=False)
        for negpath, cnt in negpaths:
            line += '    ' + negpath + '(' + str(cnt) + ')\n'

    with open('result.txt', 'w') as f:
        f.writelines(line)

    try:
        shutil.rmtree('out/')
    except Exception as e:
        print 'catch exception ', e

    os.mkdir('out/')
    for pospath, negpaths in slist:
        print 'create viewset for ', pospath
        os.mkdir('out/' + pospath)
        negpaths = sorted(negpaths.iteritems(),
                          key=lambda k: k[1],
                          reverse=False)
        for negpath, cnt in negpaths:
            dist = np.int32(cnt * 100)
            copy_rename_jpgs(rootdir, negpath, 'out\\', pospath, 4, dist)
    return
Esempio n. 4
0
def LDA_A(rootdir, posdir, posnum, negnum_p, ft):
    pos = []
    neg = []
    pathpos = []
    pathneg = []
    folders = []
    imgspos = []
    imgsneg = []
    with open('list.txt', 'r') as f:
        for line in f:
            line = line.strip()
            folders.append(line)
    if 0 == cmp(ft, 'gabor'):
        print 'feature type: GABOR'
        gbf = igbf.GABOR_FEAT()
    elif 0 == cmp(ft, 'hog'):
        print 'feature type: HOG'
        gbf = ihogf.HOG_FEAT()
    elif 0 == cmp(ft, 'lbp'):
        print 'feature type: LBP'
        gbf = ilbpf.LBP_FEAT()
    elif 0 == cmp(ft, 'dwt'):
        print 'feature type: DWT'
        gbf = idwtf.DWT_FEAT()
    elif 0 == cmp(ft, 'yuv'):
        print 'feature type: YUV'
        gbf = iyuvf.YUV_FEAT()
    else:
        print 'unknown feature type'
        return
    for folder in folders:
        fname = os.path.join(rootdir, folder)
        if 0 == cmp(folder, posdir):
            fvs, imgs = gbf.gen_folder(fname, posnum)
            if fvs is None:
                print 'pos None ', fname
                continue
            pos.extend(fvs)
            imgspos.extend(imgs)
            pathpos.extend([folder for k in range(len(fvs))])
        else:
            fvs, imgs = gbf.gen_folder(fname, negnum_p)
            if fvs is None:
                print 'neg None ', fname
                continue
            neg.extend(fvs)
            imgsneg.extend(imgs)
            pathneg.extend([folder for k in range(len(fvs))])
    label0 = [0 for k in range(len(pos))]
    label1 = [1 for k in range(len(neg))]
    samples = np.array(pos + neg)
    labels = np.array(label0 + label1)
    paths = pathpos + pathneg
    imgs = imgspos + imgsneg
    #com_num = np.minimum(300, samples.shape[0] - 10)
    clf = PCA(0.98)
    samples = clf.fit_transform(samples)
    print 'after pca : ', samples.shape
    clf = LDA()
    clf.fit(samples, labels)
    cnf = clf.decision_function(samples)
    X = []

    for k in range(len(paths)):
        X.append((paths[k], cnf[k], imgs[k]))
    X = sorted(X, key=lambda a: a[1])
    line = ""
    lineA = ""  #sometimes, the positive set is split into two parts
    lineB = ""
    for path, cnf, img in X:
        line += str(cnf) + ' ' + path + ' ' + img + '\n'
        if 0 != cmp(path, posdir):
            continue
        if cnf > 0:
            lineA += img + '\n'
        else:
            lineB += img + '\n'

    with open('A.txt', 'w') as f:
        f.writelines(lineA)
    with open('B.txt', 'w') as f:
        f.writelines(lineB)

    with open('result.txt', 'w') as f:
        f.writelines(line)

    return
Esempio n. 5
0
def LDA_B(rootdir, folderA, folderB, folderC, ft):
    pos = []
    neg = []
    imgspos = []
    imgsneg = []
    if 0 == cmp(ft, 'gabor'):
        print 'feature type: GABOR'
        gbf = igbf.GABOR_FEAT()
    elif 0 == cmp(ft, 'hog'):
        print 'feature type: HOG'
        gbf = ihogf.HOG_FEAT()
    elif 0 == cmp(ft, 'lbp'):
        print 'feature type: LBP'
        gbf = ilbpf.LBP_FEAT()
    elif 0 == cmp(ft, 'dwt'):
        print 'feature type: DWT'
        gbf = idwtf.DWT_FEAT()
    elif 0 == cmp(ft, 'yuv'):
        print 'feature type: YUV'
        gbf = iyuvf.YUV_FEAT()
    else:
        print 'unknown feature type'
        return

    #1--class A
    fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderA), 1000)
    pos.extend(fvs)
    imgspos.extend(imgs)
    #2--class B
    fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderB), 1000)
    neg.extend(fvs)
    imgsneg.extend(imgs)

    #3--train
    label0 = [0 for k in range(len(pos))]
    label1 = [1 for k in range(len(neg))]
    samples = np.array(pos + neg)
    labels = np.array(label0 + label1)
    imgs = imgspos + imgsneg

    print 'before pca : ', samples.shape
    # com_numx = 300
    # if com_numx + 10 > len(imgs):
    #     com_numx = len(imgs) - 10
    clf_pca = PCA(0.95)
    samples = clf_pca.fit_transform(samples)
    print 'after pca : ', samples.shape
    clf_lda = LDA()
    clf_lda.fit(samples, labels)

    #4--predict
    fvs, imgs = gbf.gen_folder(os.path.join(rootdir, folderC), 100000)
    samples = np.array(fvs)
    samples = clf_pca.transform(samples)
    cnf = clf_lda.decision_function(samples)
    X = []
    for k in range(len(imgs)):
        X.append((cnf[k], imgs[k]))
    X = sorted(X, key=lambda a: a[0])
    lineA = ""  #sometimes, the positive set is split into two parts
    lineB = ""
    for cnf, img in X:
        if cnf > 0:
            lineA += img + '\n'
        else:
            lineB += img + '\n'

    with open('A.txt', 'w') as f:
        f.writelines(lineA)
    with open('B.txt', 'w') as f:
        f.writelines(lineB)
    return
Esempio n. 6
0
def KNN_A(rootdir, posdir, posnum, negnum_p):
    pos = []
    neg = []
    pathpos = []
    pathneg = []
    folders = []
    imgspos = []
    imgsneg = []
    with open('list.txt', 'r') as f:
        for line in f:
            line = line.strip()
            folders.append(line)
    gbf = igbf.GABOR_FEAT()
    for folder in folders:
        fname = os.path.join(rootdir, folder)
        if 0 == cmp(folder, posdir):
            fvs, imgs = gbf.gen_folder(fname, posnum)
            if fvs is None:
                print 'pos None ', fname
                continue
            pos.extend(fvs)
            imgspos.extend(imgs)
            pathpos.extend([folder for k in range(len(fvs))])
        else:
            fvs, imgs = gbf.gen_folder(fname, negnum_p)
            if fvs is None:
                print 'neg None ', fname
                continue
            neg.extend(fvs)
            imgsneg.extend(imgs)
            pathneg.extend([folder for k in range(len(fvs))])
    label0 = [0 for k in range(len(pos))]
    label1 = [1 for k in range(len(neg))]
    samples = np.array(pos + neg)
    labels = np.array(label0 + label1)
    paths = pathpos + pathneg
    imgs = imgspos + imgsneg
    clf = PCA(100)
    print 'before pca : ', samples.shape
    samples = clf.fit_transform(samples)
    print 'after pca : ', samples.shape
    if 0:
        clf = KNeighborsClassifier(5)
        clf.fit(samples, labels)

        res = []
        for k in range(samples.shape[0]):
            prd = clf.predict(samples[k, :])
            res.append((paths[k], prd))
        res = sorted(res, key=lambda k: k[0])
        line = ""
        for path, prd in res:
            line += path + ' ' + str(prd) + '\n'
        with open('result.txt', 'w') as f:
            f.writelines(line)
    else:
        clf = NearestNeighbors(5).fit(samples)
        dists, idxs = clf.kneighbors(samples, 5)
        line = ""
        for k in range(len(idxs)):
            for j in range(len(idxs[k])):
                line += paths[idxs[k][j]] + ' '
            line += '\n'
        with open('result.txt', 'w') as f:
            f.writelines(line)
    return