コード例 #1
0
def spatial_pyramid_fisher(size_image, descriptors, coordinates_keypoints, k,
                           gmm, levels_pyramid):

    num_subim = list(
        np.append([1], [
            levels_pyramid[i][0] * levels_pyramid[i][1]
            for i in range(len(levels_pyramid))
        ]))
    num_grids = sum(num_subim)
    acc_grid = list(np.cumsum(num_subim))
    d = int(descriptors.shape[1])
    dim_vec = 2 * d * k

    fisher_vector = np.zeros((1, num_grids * dim_vec), dtype=np.float32)

    #First, we compute the Fisher Vector for the whole image

    fisher_vector[0, 0:dim_vec] = ynumpy.fisher(gmm,
                                                descriptors,
                                                include=['mu', 'sigma'])

    for i in range(1, len(num_subim)):

        #For each level of the pyramid, we divide the image in the specified parts
        grid = levels_pyramid[i - 1]

        X = np.floor(np.linspace(0, size_image[0] - 1, num=grid[0] + 1))
        Y = np.floor(np.linspace(0, size_image[1] - 1, num=grid[1] + 1))

        #Compute the corners of each subimage
        up_corner = list(itertools.product(X[:-1], Y[:-1]))
        down_corner = list(itertools.product(X[1:], Y[1:]))

        descriptors_subimages = [[] for j in range(num_subim[i])]

        for l in range(len(coordinates_keypoints)):
            #For each descriptor, determine the subimage it belongs to
            for j in range(num_subim[i]):
                x = coordinates_keypoints[l][0]
                y = coordinates_keypoints[l][1]

                if x > up_corner[j][0] and y > up_corner[j][
                        1] and x < down_corner[j][0] and y < down_corner[j][1]:
                    descriptors_subimages[j].append(descriptors[l])
                    break

        #For each subimage, we compute the visual words and we concatenate all
        for j in range(num_subim[i]):
            if len(descriptors_subimages[j]) != 0:

                vector = ynumpy.fisher(gmm,
                                       np.array(descriptors_subimages[j],
                                                dtype=np.float32),
                                       include=['mu', 'sigma'])
                fisher_vector[0, dim_vec * (acc_grid[i - 1] + j):dim_vec *
                              (acc_grid[i - 1] + j + 1)] = vector

    return fisher_vector
コード例 #2
0
def create_fisher_vector_unsaved(gmm_list, video_desc):
    """
  expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py.

  this single video_desc contains the (trajs, hogs, hofs, mbhxs, mbhys) np.ndarrays

  works like create_fisher_vector but without saving anything to improve it's speed
  """
    vid_desc_list = []
    vid_desc_list.append(video_desc.traj)
    vid_desc_list.append(video_desc.hog)
    vid_desc_list.append(video_desc.hof)
    vid_desc_list.append(video_desc.mbhx)
    vid_desc_list.append(video_desc.mbhy)
    fvs = []
    for descriptor, gmm_mean_pca in zip(vid_desc_list, gmm_list):
        gmm, mean, pca_transform = gmm_mean_pca
        descrip = descriptor.astype('float32') - mean
        if pca_transform is not None:
            descrip = np.dot(
                descriptor.astype('float32') - mean, pca_transform)
        fv = ynumpy.fisher(gmm, descrip, include=['mu', 'sigma'])
        fv = np.sign(fv) * (np.abs(fv)**0.5)
        norms = np.sqrt(np.sum(fv**2))
        fv /= norms
        fv[np.isnan(fv)] = 100
        fvs.append(fv.T)
    output_fv = np.hstack(fvs)
    norm = np.sqrt(np.sum(output_fv**2))
    output_fv /= norm
    return output_fv
コード例 #3
0
def test_system(test_filenames, test_labels, detector, stdSlr_features, pca,
                gmm, stdSlr, clf, options):
    if options.apply_pca:
        num_features = options.ncomp_pca
    else:
        num_features = 128
    fisher_test = np.zeros(
        (len(test_filenames), options.kmeans * num_features * 2),
        dtype=np.float32)
    for i in range(len(test_filenames)):
        filename = test_filenames[i]
        print 'Reading image ' + filename
        ima = cv2.imread(filename)
        gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY)
        kpt, des = detector.detectAndCompute(gray, None)

        if options.apply_pca:
            des = stdSlr_features.transform(des)
            des = pca.transform(des)

        fisher_test[i, :] = ynumpy.fisher(gmm, des, include=['mu', 'sigma'])

        if options.apply_normalization:
            fisher_test = applyNormalization(fisher_test, options)

    test_fisher_vectors_scaled = stdSlr.transform(fisher_test)
    accuracy = 100 * clf.score(test_fisher_vectors_scaled, test_labels)

    if options.evaluation_measures:
        final_issues(test_fisher_vectors_scaled, test_labels, clf, options)

    return accuracy
コード例 #4
0
def calculateFV(img):
    im_matrix_ = np.array(img)
    # k is the GMM dimension
    k = 256
    n_sample = im_matrix_.shape[0]

    # compute PCA and transform the samples
    pca_transform = myPCA(im_matrix_, k)
    im_matrix_ = pca_transform.transform(im_matrix_)

    # train GMM
    print("Start fitting GMM")
    GMM_ = GaussianMixture(n_components=k,
                           covariance_type='diag',
                           verbose_interval=1)
    t1 = time.time()
    GMM_.fit(im_matrix_)
    print("GMM fit in {}".format(time.time() - t1))

    # Get GMM matrices
    w_, mu_, sigma_ = GMM_.weights_, GMM_.means_, GMM_.covariances_

    # Convert to FP32 (from FP64)
    gmm = w_.astype('float32'), mu_.astype('float32'), sigma_.astype('float32')

    # compute FVS
    print("Processing FV of image i")
    # compute the Fisher vector, using only the derivative w.r.t mu
    fv = ynumpy.fisher(gmm, im_matrix_, include='mu')
    print("FV processed.")
    return fv
コード例 #5
0
	def EncodeSift (gmm, image_descs, pca_transform, mean):
		image_fvs = []
		for image_desc in image_descs:
		   # apply the PCA to the image descriptor
		   image_desc = np.dot(image_desc - mean, pca_transform)
		   # compute the Fisher vector, using only the derivative w.r.t mu
		   fv = ynumpy.fisher(gmm, image_desc, include = 'mu')
		   image_fvs.append(fv)

		# make one matrix with all FVs
		image_fvs = np.vstack(image_fvs)

		# normalizations are done on all descriptors at once

		# power-normalization
		image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5

		# L2 normalize
		norms = np.sqrt(np.sum(image_fvs ** 2, 1))
		image_fvs /= norms.reshape(-1, 1)

		# handle images with 0 local descriptor (100 = far away from "normal" images)
		image_fvs[np.isnan(image_fvs)] = 100

		return image_fvs
コード例 #6
0
def create_fisher_vector(gmm_list, video_desc, fisher_path):
  """
  expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py
  fisher path is the full path to the fisher vector that is created.
  this single video_desc contains the (trajs, hogs, hofs, mbhxs, mbhys) np.ndarrays
  """
  vid_desc_list = []
  vid_desc_list.append(video_desc.traj)
  vid_desc_list.append(video_desc.hog)
  vid_desc_list.append(video_desc.hof)
  vid_desc_list.append(video_desc.mbhx)
  vid_desc_list.append(video_desc.mbhy)
  #For each video create and normalize a fisher vector for each of the descriptors. Then, concatenate the
  #fisher vectors together to get an extra long fisher vector.
  # Return a list of all of these long fisher vectors. The list should be the same length as the number
  # of input videos.
  fvs = []

  for descriptor,gmm_mean_pca in zip(vid_desc_list,gmm_list):
    gmm, mean, pca_transform = gmm_mean_pca
    # apply the PCA to the vid_trajectory descriptor
    #each image_desc is of size (X,TRAJ_DIM). Pca_tranform is of size (TRAJ_DIM,TRAJ_DIM/2)
    descrip = descriptor.astype('float32') - mean
    print type(gmm),type(mean),type(pca_transform)
    print len(gmm), len(mean), len(pca_transform)
    if pca_transform.all != None:
      descrip = np.dot(descriptor.astype('float32') - mean, pca_transform)
    # compute the Fisher vector, using the derivative w.r.t mu and sigma
    fv = ynumpy.fisher(gmm, descrip, include = ['mu', 'sigma'])
    # normalizations are done on each descriptors individually
    # power-normalization
    fv = np.sign(fv) * (np.abs(fv) ** 0.5)
    # L2 normalize
    #sum along the rows.
    norms = np.sqrt(np.sum(fv ** 2))
    # -1 allows reshape to infer the length. So it just solidifies the dimensions to (274,1)
    fv /= norms
    # handle images with 0 local descriptor (100 = far away from "normal" images)
    fv[np.isnan(fv)] = 100
      
    print "Performing fvs"
    fvs.append(fv.T)

  output_fv = np.hstack(fvs)

  #L2 normalize the entire fv.
  norm = np.sqrt(np.sum(output_fv ** 2))
  output_fv /= norm

  #example name:
  # 'v_Archery_g01_c01.fisher.npz'
  #subdirectory name
  np.savez(fisher_path, fish=output_fv)
  print fisher_path
  return output_fv
コード例 #7
0
def get_distances(train_images):
	surf = cv2.SURF(hessianThreshold=500, extended=True)
	image_descs = []
	for fnames in train_images:
		try:
    			img = cv2.imread(fnames,0);
		    	kp, des = surf.detectAndCompute(img, None)			
		except:
			continue
		image_descs.append(des)


	all_desc= np.vstack(image_descs)

	k = 128
	n_sample = k * 500

	sample = all_desc
	sample = sample.astype('float32')

	mean = sample.mean(axis = 0)
	sample = sample - mean
	cov = np.dot(sample.T, sample)

	eigvals, eigvecs = np.linalg.eig(cov)
	perm = eigvals.argsort()
	pca_transform = eigvecs[:, perm[32:128]]

	sample = np.dot(sample, pca_transform)
	gmm = ynumpy.gmm_learn(sample, k)

	image_fvs = []
	for image_desc in image_descs:
		image_desc = np.dot(image_desc - mean, pca_transform)
		fv = ynumpy.fisher(gmm, image_desc, include = 'mu')
		image_fvs.append(fv)

	image_fvs = np.vstack(image_fvs)
	image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5
	norms = np.sqrt(np.sum(image_fvs ** 2, 1))
	image_fvs /= norms.reshape(-1, 1)

	image_fvs[np.isnan(image_fvs)] = 100

	query_imnos = range(0,len(image_fvs)-1);
	query_fvs = image_fvs#[query_imnos]

	results, distances = ynumpy.knn(query_fvs, image_fvs, nnn = len(image_fvs))
	s_results = np.argsort(results, axis = 1)
	s_distances = distances*0
	for i in range(distances.shape[0]):
	    s_distances[i,:] = distances[i,s_results[i,:]]
	
	return s_distances
コード例 #8
0
ファイル: computeFV.py プロジェクト: VeyronWang/CS221_Project
def create_fisher_vector(gmm_list, video_desc, fisher_path):
  """
  expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py
  fisher path is the full path to the fisher vector that is created.
  
  this single video_desc contains the (trajs, hogs, hofs, mbhxs, mbhys) np.ndarrays
  """
  vid_desc_list = []
  vid_desc_list.append(video_desc.traj)
  vid_desc_list.append(video_desc.hog)
  vid_desc_list.append(video_desc.hof)
  vid_desc_list.append(video_desc.mbhx)
  vid_desc_list.append(video_desc.mbhy)
  #For each video create and normalize a fisher vector for each of the descriptors. Then, concatenate the
  #fisher vectors together to get an extra long fisher vector.
  # Return a list of all of these long fisher vectors. The list should be the same length as the number
  # of input videos.
  fvs = []
  for descriptor,gmm_mean_pca in zip(vid_desc_list,gmm_list):
      gmm, mean, pca_transform = gmm_mean_pca
     # apply the PCA to the vid_trajectory descriptor
      #each image_desc is of size (X,TRAJ_DIM). Pca_tranform is of size (TRAJ_DIM,TRAJ_DIM/2)
      descrip = descriptor.astype('float32') - mean
      if pca_transform != None:
        descrip = np.dot(descriptor.astype('float32') - mean, pca_transform)
      # compute the Fisher vector, using the derivative w.r.t mu and sigma
      fv = ynumpy.fisher(gmm, descrip, include = ['mu', 'sigma'])

      # normalizations are done on each descriptors individually
      # power-normalization
      fv = np.sign(fv) * (np.abs(fv) ** 0.5)
     # L2 normalize
     #sum along the rows.
      norms = np.sqrt(np.sum(fv ** 2))
     # -1 allows reshape to infer the length. So it just solidifies the dimensions to (274,1)
      fv /= norms
     # handle images with 0 local descriptor (100 = far away from "normal" images)
      fv[np.isnan(fv)] = 100
      fvs.append(fv.T)
  output_fv = np.hstack(fvs)

  #L2 normalize the entire fv.
  norm = np.sqrt(np.sum(output_fv ** 2))
  output_fv /= norm

  #example name:
  # 'v_Archery_g01_c01.fisher.npz'
  #subdirectory name
  np.savez(fisher_path, fish=output_fv)
  print fisher_path
  return output_fv
コード例 #9
0
ファイル: fisherVectors.py プロジェクト: eglrp/M3-Team5
def getFisherVectors(Train_descriptors, k, gmm):
    print 'Computing Fisher vectors'
    d = int(Train_descriptors[0].shape[1])
    init = time.time()
    fisher = np.zeros((len(Train_descriptors), k * d * 2), dtype=np.float32)
    for i in xrange(len(Train_descriptors)):
        fisher[i, :] = ynumpy.fisher(gmm,
                                     Train_descriptors[i],
                                     include=['mu', 'sigma'])

    end = time.time()
    print 'Done in ' + str(end - init) + ' secs.'

    return fisher
コード例 #10
0
def evaluate_test(clf, stdSl, test_images_filenames, k, detector, gmm,
                  n_components):

    fisher_test = np.zeros((len(test_images_filenames), k * n_components * 2),
                           dtype=np.float32)
    for i in range(len(test_images_filenames)):
        filename = test_images_filenames[i]
        print 'Reading image ' + filename
        kpt, des = compute_dense(test_images_filenames[i], detector)
        fisher_test[i, :] = ynumpy.fisher(gmm, des, include=['mu', 'sigma'])

    accuracy = 100 * clf.score(stdSl.transform(fisher_test), test_labels)

    return accuracy
コード例 #11
0
def GetKnn(ID):
    print ID
    info = Info.GetVideoInfo(ID)
    frame_sift_lst = [
        x for x in sorted(os.listdir(info['frame_sift_path']))
        if x.endswith('.sift')
    ]
    pano_sift_lst = [
        x for x in sorted(os.listdir(info['pano_sift_path']))
        if x.endswith('.sift')
    ]
    #print pano_sift_lst
    frame_desc = []
    pano_desc = []
    for one in frame_sift_lst:
        f_name = info['frame_sift_path'] + '/' + one
        desc = ReadSift.ReadSift(f_name)[1]
        if desc.size == 0:
            desc = np.zeros((0, 128), dtype='uint8')
        frame_desc.append(desc)
    for one in pano_sift_lst:
        f_name = info['pano_sift_path'] + '/' + one
        desc = ReadSift.ReadSift(f_name)[1]
        if desc.size == 0:
            desc = np.zeros((0, 128), dtype='uint8')
        pano_desc.append(desc)
    data = np.load(Info.Config.ROOT_PATH + '/gmm_2step.npz')
    gmm = (data['a'], data['b'], data['c'])
    mean = data['mean']
    pca_transform = data['pca_transform']

    image_fvs = []
    for image_dec in (frame_desc + pano_desc):
        image_dec = np.dot(image_dec - mean, pca_transform)
        fv = ynumpy.fisher(gmm, image_dec, include='mu')
        image_fvs.append(fv)
    image_fvs = np.vstack(image_fvs)
    image_fvs = np.sign(image_fvs) * np.abs(image_fvs)**0.5
    norms = np.sqrt(np.sum(image_fvs**2, 1))
    image_fvs /= norms.reshape(-1, 1)
    image_fvs[np.isnan(image_fvs)] = 100

    frame_fvs = image_fvs[0:len(frame_sift_lst)]
    pano_fvs = image_fvs[len(frame_sift_lst):]

    results, distances = ynumpy.knn(frame_fvs, pano_fvs, nnn=10)
    #print results
    #print distances
    np.save(info['pano_path'] + '/fisher_results', results)
コード例 #12
0
def getFisherForImage(filename):
    #k = data[0]
    descriptor_type = data[1]
    gmm = data[2]
    computedPca = data[4]

    kpt,des=getKptDesForImage(filename,descriptor_type)
    
    if computedPca != None:
        des = computedPca.transform(des)
    
    des=np.float32(des)    
        
    fisher_test=ynumpy.fisher(gmm, des, include = ['mu','sigma'])
    
    return fisher_test
    def transform(self, X):
        print 'Getting Fisher Vector representation'
        init = time.time()

        descriptors = X['descriptors']
        positions = X['positions']
        imsizes = X['imsizes']
        image_fvs=[]
        for image_desc in descriptors:
            # apply the PCA to the image descriptor
            image_desc = self.PCA.transform(image_desc-self.mean)
            # compute the Fisher vector, using only the derivative w.r.t mu
            fv = ynumpy.fisher(self.gmm, image_desc, include='mu')
            image_fvs.append(fv)

        end = time.time()
        print '\tDone in ' + str(end - init) + ' secs.'
        return image_fvs
コード例 #14
0
def GetKnn(ID):
    print ID
    info = Info.GetVideoInfo(ID)
    frame_sift_lst = [x for x in sorted(os.listdir(info['frame_sift_path'])) if x.endswith('.sift')]
    pano_sift_lst = [x for x in sorted(os.listdir(info['pano_sift_path'])) if x.endswith('.sift')]
    #print pano_sift_lst
    frame_desc = []
    pano_desc = []
    for one in frame_sift_lst:
        f_name = info['frame_sift_path'] + '/' + one
        desc = ReadSift.ReadSift(f_name)[1]
        if desc.size == 0:
            desc = np.zeros((0, 128), dtype = 'uint8')
        frame_desc.append(desc)
    for one in pano_sift_lst:
        f_name = info['pano_sift_path'] + '/' + one
        desc = ReadSift.ReadSift(f_name)[1]
        if desc.size == 0:
            desc = np.zeros((0, 128), dtype = 'uint8')
        pano_desc.append(desc)
    data = np.load(Info.Config.ROOT_PATH + '/gmm_2step.npz')
    gmm = (data['a'], data['b'], data['c'])
    mean = data['mean']
    pca_transform = data['pca_transform']

    image_fvs = []
    for image_dec in (frame_desc + pano_desc):
        image_dec = np.dot(image_dec - mean, pca_transform)
        fv = ynumpy.fisher(gmm, image_dec, include = 'mu')
        image_fvs.append(fv)
    image_fvs = np.vstack(image_fvs)
    image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5
    norms = np.sqrt(np.sum(image_fvs ** 2, 1))
    image_fvs /= norms.reshape(-1,1)
    image_fvs[np.isnan(image_fvs)] = 100
    
    frame_fvs = image_fvs[0:len(frame_sift_lst)]
    pano_fvs = image_fvs[len(frame_sift_lst):]
    
    results, distances = ynumpy.knn(frame_fvs, pano_fvs, nnn = 10)
    #print results 
    #print distances
    np.save(info['pano_path'] + '/fisher_results', results)
コード例 #15
0
def compute_fisher_vectors(D, n_components, k):

    print 'Computing gmm with ' + str(k) + ' centroids'
    init = time.time()
    gmm = ynumpy.gmm_learn(np.float32(D), k)
    end = time.time()
    print 'Done in ' + str(end - init) + ' secs.'

    init = time.time()
    fisher = np.zeros((len(Train_descriptors), k * n_components * 2),
                      dtype=np.float32)
    for i in xrange(len(Train_descriptors)):
        fisher[i, :] = ynumpy.fisher(gmm,
                                     np.float32(D),
                                     include=['mu', 'sigma'])

    end = time.time()
    print 'Done in ' + str(end - init) + ' secs.'

    return (fisher, gmm)
コード例 #16
0
ファイル: search_ukbench.py プロジェクト: Erotemic/yael
queries = []

if show:
    fig = plt.figure(figsize=(10, 10))
    fig.canvas.set_window_title("100 image dataset")
    plot_idx = 1

for i in image_range:
    filename = "%s/ukbench%05d.siftgeo" % (sift_directory, i)
    print("  " + filename + "\r")
    sys.stdout.flush()

    sift_descriptors, geometric_info = ynumpy.siftgeo_read(filename)

    # compute the Fisher vector using the GMM
    fv = ynumpy.fisher(gmm, sift_descriptors.astype('float32'))

    dataset.append(fv)

    if show:
        imagename = "%s/ukbench%05d.jpg" % (image_directory, i)
        im = Image.open(imagename)
        ax = plt.subplot(13, 8, plot_idx)
        ax.axis('off')
        plt.imshow(im)
        if i % 8 == 7:
            plt.draw()
        plot_idx += 1

dataset = numpy.vstack(dataset)
コード例 #17
0
ファイル: test_fisher.py プロジェクト: smurakami/yael
import numpy as np
from yael import ynumpy

dat = np.load("test/py/test_fisher_dat.npy")
gmm = np.load("test/py/test_gmm.pickle")

dat_a = dat[: len(dat) / 2]
dat_b = dat[len(dat) / 2 :]
a = ynumpy.fisher(gmm, np.vstack([dat, dat]).astype(np.float32), include="mu+sigma")
b = ynumpy.fisher(gmm, np.vstack([dat]).astype(np.float32), include="mu+sigma")
sw_a = np.ones(len(dat) / 2) * 4
sw_b = np.ones(len(dat) / 2) * 2
c = ynumpy.fisher_sw(gmm, dat.astype(np.float32), np.vstack([sw_a, sw_b]).astype(np.float32), include="mu+sigma")

# print a - c
print a - b
# print b - c

# sw = np.array([(i + 5) % 10 for i in xrange(len(dat))])

# dat_weighted = np.vstack([np.vstack([dat[i]] * sw[i])
#                           for i in range(len(dat)) if sw[i] != 0])

# # np.ones(len(dat), dtype=np.float32)

# a = ynumpy.fisher(gmm, dat_weighted.astype(np.float32), include='mu+sigma')
# b = ynumpy.fisher_sw(gmm, dat.astype(np.float32), (sw).astype(np.float32), include='mu+sigma')

# print sw
# print sw * 0.1
コード例 #18
0
def predict_fishergmm(gmm, des, options):
    # Compute the Fisher Vectors from the features.
    # des is supposed to be the features of a single image.
    des2 = np.float32(des)
    fisher = ynumpy.fisher(gmm, des2, include=['mu', 'sigma'])
    return fisher
コード例 #19
0
ファイル: demo.py プロジェクト: bityangke/yael
perm = eigvals.argsort()                   # sort by increasing eigenvalue
pca_transform = eigvecs[:, perm[64:128]]   # eigenvectors for the 64 last eigenvalues

# transform sample with PCA (note that numpy imposes line-vectors,
# so we right-multiply the vectors)
sample = np.dot(sample, pca_transform)

# train GMM
gmm = ynumpy.gmm_learn(sample, k)

image_fvs = []
for image_desc in image_descs:
   # apply the PCA to the image descriptor
   image_desc = np.dot(image_desc - mean, pca_transform)
   # compute the Fisher vector, using the derivative w.r.t mu and sigma
   fv = ynumpy.fisher(gmm, image_desc, include = 'mu, sigma')
   image_fvs.append(fv)

# make one matrix with all FVs
image_fvs = np.vstack(image_fvs)

# normalizations are done on all descriptors at once

# power-normalization
image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5

# L2 normalize
norms = np.sqrt(np.sum(image_fvs ** 2, 1))
image_fvs /= norms.reshape(-1, 1)

# handle images with 0 local descriptor (100 = far away from "normal" images)
コード例 #20
0
ファイル: search_ukbench.py プロジェクト: pioneer911/yael
queries = []

if show:
    fig = plt.figure(figsize=(10, 10))
    fig.canvas.set_window_title("100 image dataset")
    plot_idx = 1

for i in image_range:
    filename = "%s/ukbench%05d.siftgeo" % (sift_directory, i)
    print("  " + filename + "\r")
    sys.stdout.flush()

    sift_descriptors, geometric_info = ynumpy.siftgeo_read(filename)

    # compute the Fisher vector using the GMM
    fv = ynumpy.fisher(gmm, sift_descriptors.astype('float32'))

    dataset.append(fv)

    if show:
        imagename = "%s/ukbench%05d.jpg" % (image_directory, i)
        im = Image.open(imagename)
        ax = plt.subplot(13, 8, plot_idx)
        ax.axis('off')
        plt.imshow(im)
        if i % 8 == 7:
            plt.draw()
        plot_idx += 1

dataset = numpy.vstack(dataset)
コード例 #21
0
def generate_fisher(mirrored_features, gmm_results, gmm_object, ncomponents,
                    start_idxs, end_idxs):
    """
    Generate Fisher vector features for mirrored_features.

    :param mirrored_features: Features to compute Fisher vector features for.
    :param gmm_results: (weights, means, sigmas) from fitted GMM.
    :param gmm_object: GMM object from either yael or scikit-learn.
    :param ncomponents: Number of components used in Gaussian mixture model.
    :param start_idxs: Start indices of each sliding window.
    :param end_idxs: End indices of each sliding window.
    :return: fv_features: Fisher vector features for mirrored_features.
    """
    print('Generating Fisher vector features...')
    fv_features = np.zeros(
        (len(start_idxs),
         2 * ncomponents * np.size(mirrored_features, 1) + ncomponents - 1))
    if not USE_YAEL:
        ws, mus, sigmas = gmm_results
        ncomponents = len(ws)
    for i in range(len(start_idxs)):
        if start_idxs[i] != -1:
            if USE_YAEL:
                X = mirrored_features[int(start_idxs[i]):int(end_idxs[i]) +
                                      1, :].astype('float32')
                fv_features[i, :] = ynumpy.fisher(gmm_results,
                                                  X,
                                                  include=['w', 'mu', 'sigma'])
            else:
                X = mirrored_features[int(start_idxs[i]):int(end_idxs[i]) +
                                      1, :]
                num_samples = np.size(X, 0)
                try:
                    gammas = gmm_object.predict_proba(X)
                except:
                    gammas = np.zeros((np.size(X, 0), len(ws)))
                    for obs in range(len(X)):
                        gammas[obs, :] = compute_gmm_probs(
                            X[obs, :], ws, mus, sigmas)
                accus = np.sum(gammas[:, 1:] / ws[1:] -
                               (gammas[:, 0] / ws[0])[:, np.newaxis],
                               axis=0)
                grad_alpha = [
                    accus[idx] / np.sqrt((1 / ws[idx + 1] + 1 / ws[0]))
                    for idx in range(0, ncomponents - 1)
                ]
                grad_mu = [
                    np.sqrt(sigmas[k, :] / (ws[k])) *
                    np.dot(gammas[:, k], (X - mus[k]) / sigmas[k])
                    for k in range(ncomponents)
                ]
                grad_sigma = [
                    np.sqrt(1 / (2 * ws[k])) *
                    np.dot(gammas[:, k], (X - mus[k])**2 / sigmas[k] - 1)
                    for k in range(ncomponents)
                ]

                fv_features[i, :] = 1 / np.sqrt(num_samples) * np.concatenate(
                    (grad_alpha, np.array(grad_mu).flatten(),
                     np.array(grad_sigma).flatten()))

    # Normalize
    fv_features = power_l2_normalize(fv_features, power_normalize=False)
    return fv_features
コード例 #22
0
ファイル: fv.py プロジェクト: evhub/cnn-cbir-benchmark
    #n_sifts = image_desc.shape[0]
    #for i in range(n_sifts):
    #    if np.linalg.norm(image_desc[i], ord=1) == 0.0:
    #        continue
    #    image_desc[i] = np.sqrt(image_desc[i]/np.linalg.norm(image_desc[i], ord=1))

    #n_sifts = image_desc.shape[0]
    #for i in range(n_sifts):
    #    image_desc[i] = np.sign(image_desc[i]) * np.log(1.0 + np.abs(image_desc[i]))

    # apply the PCA to the image descriptor
    image_desc = np.dot(image_desc - mean, pca_transform)
    image_desc = image_desc.astype(np.float32)

    # compute the Fisher vector, using only the derivative w.r.t mu
    fv = ynumpy.fisher(gmm, image_desc, include=['mu', 'sigma'])
    features.append(fv)
    image_names.append(img_name)

# make one matrix with all FVs
features = np.vstack(features)

# normalizations are done on all descriptors at once

# power-normalization
features = np.sign(features) * np.abs(features)**0.5

# L2 normalize
#norms = np.sqrt(np.sum(image_fvs ** 2, 1))
#image_fvs /= norms.reshape(-1, 1)
コード例 #23
0
def Pyramid_BoW_fisher(gmm, Image_info, x_part, y_part):

    k = gmm.shape[0]
    # Dimensió de cada vector = k* nº cel·les, en aquest cas 21 (16 peques, 4 qadrants i la sencera)
    visual_words = []
    #i = 0
    for img, label in Image_info:

        total_rows = x_part**2
        total_columns = y_part**2

        x_step = img.shape[0] / total_rows
        y_step = img.shape[1] / total_columns

        Q = [[0 for x in xrange(total_rows)] for y in xrange(total_columns)]
        Q_int = [[0 for w in xrange(x_part)] for z in xrange(y_part)]
        #classifiquem els descriptors segons les coordenades del kp al qual pertanyen
        for kpt, desc in zip(img.kpt, img.des):

            #nota: shape(num_files, num_columnes) \ coordenada del punt = (x,y)
            kpt = kpt.pt

            for row in xrange(total_rows):
                for column in xrange(total_columns):
                    if kpt[0] < y_step * (column + 1) and kpt[
                            0] > y_step * column and kpt[1] < x_step * (
                                row + 1) and kpt[1] > x_step * row:
                        Q[row][column].append(desc.tolist())

        #Componer nivel intermedio
        for row in xrange(x_part):
            for column in xrange(y_part):
                for sub_r in xrange(x_part):
                    for sub_c in xrange(y_part):
                        Q_int[row][column] = np.array(
                            Q[row * x_part + sub_r][column * y_part + sub_c])
                #Q_int[row][column] = np.array(Q[row*x_part:row*x_part+(x_part),column*y_part:column*y_part+(y_part)])

        #Per comoditat, formem una llista amb tots els descriptors classificats
        #Q = [img.des, Q1, Q2, Q3, Q4, np.array(Q11), np.array(Q12), np.array(Q13), np.array(Q14), np.array(Q21), np.array(Q22), np.array(Q23), np.array(Q24), np.array(Q31), np.array(Q32), np.array(Q33), np.array(Q34), np.array(Q41), np.array(Q42), np.array(Q43), np.array(Q44)]

        des_array = []
        des_array.append(0.25 * np.array(img.des))

        for arr_r in xrange(x_part):
            for arr_c in xrange(y_part):
                des_array.append(0.25 * np.array(Q_int[arr_r][arr_c]))

        for arr_r in xrange(total_rows):
            for arr_c in xrange(total_columns):
                des_array.append(0.5 * np.array(Q[arr_r][arr_c]))

        #Iniciem el descriptor piramidal
        Pdesc = []
        for q in des_array:
            #Generate fisher vectors with each grid partition
            if len(q):
                #Fisher prediction
                Pdesc += ynumpy.fisher(gmm, q, include=['mu', 'sigma'])
                #Pdesc += np.bincount(codebook.predict(np.array(q)),minlength=k).tolist() just for BOW
            else:
                Pdesc += np.zeros(k, dtype=np.int64).tolist()

        visual_words.append(Pdesc)

    return visual_words
コード例 #24
0
ファイル: session3.py プロジェクト: vcampmany/M3_ImageClassi
def main(nfeatures=100,
         code_size=32,
         n_components=60,
         kernel='linear',
         C=1,
         reduction=None,
         features='sift',
         pyramid=False,
         grid_step=6):
    start = time.time()

    # read the train and test files
    train_images_filenames, test_images_filenames, train_labels, test_labels = get_dataset(
    )

    # create the SIFT detector object
    SIFTdetector = features_detector(nfeatures, features, grid_step)

    # extract SIFT keypoints and descriptors
    # store descriptors in a python list of numpy arrays

    Train_descriptors, Train_label_per_descriptor = getDescriptors(
        SIFTdetector, train_images_filenames, train_labels, pyramid)

    Train_descriptors = np.asarray(Train_descriptors)

    # Transform everything to numpy arrays
    size_descriptors = Train_descriptors[0][0].shape[-1]
    # for D we only need the first level of the pyramid (because it already contains all points)
    D = np.zeros(
        (np.sum([len(p[0]) for p in Train_descriptors]), size_descriptors),
        dtype=np.uint8)
    startingpoint = 0
    for i in range(len(Train_descriptors)):
        D[startingpoint:startingpoint +
          len(Train_descriptors[i][0])] = Train_descriptors[i][0]
        startingpoint += len(Train_descriptors[i][0])
    if reduction == 'pca':
        D, pca_reducer = PCA_reduce(D, n_components)

    k = code_size
    # Compute Codebook
    gmm = compute_codebook(D, k, nfeatures, None, features, grid_step,
                           D.shape[1])

    init = time.time()
    fisher = np.zeros((len(Train_descriptors),
                       k * D.shape[1] * 2 * Train_descriptors.shape[1]),
                      dtype=np.float32)  #TODO: change 128
    for i in xrange(len(Train_descriptors)):
        for j in range(Train_descriptors.shape[1]):  #number of levels
            if reduction == 'pca':
                des = pca_reducer.transform(
                    Train_descriptors[i][j])  # for pyramid level j
            else:
                des = Train_descriptors[i][j]  # for pyramid level j
            fisher[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] *
                   2] = ynumpy.fisher(gmm,
                                      np.float32(des),
                                      include=['mu', 'sigma'])
            # fisher[i,:]= l2

    end = time.time()
    print 'Done in ' + str(end - init) + ' secs.'

    # Train a linear SVM classifier

    stdSlr = StandardScaler().fit(fisher)
    D_scaled = stdSlr.transform(fisher)
    print 'Training the SVM classifier...'

    if kernel == 'pyramid_match':
        ker_matrix = spatialPyramidKernel(D_scaled, D_scaled,
                                          k * D.shape[1] * 2, pyramid)
        clf = svm.SVC(kernel='precomputed', C=C)
        clf.fit(ker_matrix, train_labels)
    else:
        clf = svm.SVC(kernel=kernel, C=C).fit(D_scaled, train_labels)
    print 'Done!'

    # get all the test data and predict their labels
    fisher_test = np.zeros((len(test_images_filenames),
                            k * D.shape[1] * 2 * Train_descriptors.shape[1]),
                           dtype=np.float32)
    for i in range(len(test_images_filenames)):
        filename = test_images_filenames[i]
        print 'Reading image ' + filename
        ima = cv2.imread(filename)
        gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY)
        all_kpt, all_des = SIFTdetector.detect_compute(gray, pyramid)
        for j in range(len(all_des)):  #number of levels
            des = all_des[j]
            if reduction == 'pca':
                des = pca_reducer.transform(des)
            fisher_test[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] *
                        2] = ynumpy.fisher(gmm,
                                           np.float32(des),
                                           include=['mu', 'sigma'])

    accuracy = 100 * clf.score(stdSlr.transform(fisher_test), test_labels)
    fisher_test = stdSlr.transform(fisher_test)
    if kernel == 'pyramid_match':
        predictMatrix = spatialPyramidKernel(fisher_test, D_scaled,
                                             k * D.shape[1] * 2, pyramid)
        #predictions = clf.predict(predictMatrix)
        #predictions_proba = clf.predict_proba(predictMatrix)
        accuracy = 100 * clf.score(predictMatrix, test_labels)
    else:
        accuracy = 100 * clf.score(fisher_test, test_labels)

    print 'Final accuracy: ' + str(accuracy)

    end = time.time()
    print 'Done in ' + str(end - start) + ' secs.'
コード例 #25
0
ファイル: test_fisher.py プロジェクト: smurakami/yael
import numpy as np
from yael import ynumpy

dat = np.load('test/py/test_fisher_dat.npy')
gmm = np.load('test/py/test_gmm.pickle')

dat_a = dat[:len(dat) / 2]
dat_b = dat[len(dat) / 2:]
a = ynumpy.fisher(gmm,
                  np.vstack([dat, dat]).astype(np.float32),
                  include='mu+sigma')
b = ynumpy.fisher(gmm, np.vstack([dat]).astype(np.float32), include='mu+sigma')
sw_a = np.ones(len(dat) / 2) * 4
sw_b = np.ones(len(dat) / 2) * 2
c = ynumpy.fisher_sw(gmm,
                     dat.astype(np.float32),
                     np.vstack([sw_a, sw_b]).astype(np.float32),
                     include='mu+sigma')

# print a - c
print a - b
# print b - c

# sw = np.array([(i + 5) % 10 for i in xrange(len(dat))])

# dat_weighted = np.vstack([np.vstack([dat[i]] * sw[i])
#                           for i in range(len(dat)) if sw[i] != 0])

# # np.ones(len(dat), dtype=np.float32)

# a = ynumpy.fisher(gmm, dat_weighted.astype(np.float32), include='mu+sigma')
コード例 #26
0
ファイル: test_ynumpy.py プロジェクト: ybrs/yael
centroids = ynumpy.kmeans(v, 3)

print "result centroids ="
print centroids[:10, :]

print "gmm:"

gmm = ynumpy.gmm_learn(v, 3)

(w, mu, sigma) = gmm

print "mu = "
print mu

print "sigma = "
print sigma

muc = numpy.vstack((mu[0, :], mu[0, :]))

#                    mu[1, :],
#                    mu[1, :],
#                    mu[1, :]))

print "mu=", mu
muc += numpy.random.normal(-0.02, 0.02, size=muc.shape)
print "muc=", muc

fish = ynumpy.fisher(gmm, muc)

print fish
コード例 #27
0
def process(signature=None):
    # make a big matrix with all image descriptors

    all_desc = []
    #handle the case it's requested to process the entire dataset
    if signature is None:
        h5f = h5py.File("feature_matrix.h5", 'r')
        feats = h5f['feature_matrix'][:]
        h5f.close()

        #normalize input matrix to avoid GMM crash
        feats = normalize(feats, axis=1, norm='l2')

        #ensure thet the descriptors are FP32 and put them in a matrix
        image_descs = np.array(feats).astype('float32')
        all_desc = np.vstack(image_descs)

    try:
        #if available, load GMM model and PCA
        h5f = h5py.File("GMM.h5", 'r')
        gmm = np.array(h5f['gmm1']).astype('float32'), np.array(
            h5f['gmm2']).astype('float32'), np.array(
                h5f['gmm3']).astype('float32')
        pca_transform = joblib.load('pca_transform_gmm.pkl')
        h5f.close()
        print("there are GMM and pca_transform")
    except:
        #handle the case where there aren't the needed data to process.
        if signature is not None:
            error = "No needed data found. Abort."
            print(error)
            return error

        #in case it's needed to populate the DB
        print("there aren't GMM and pca_transform: computing.")
        #k is the GMM dimension
        k = 512
        n_sample = k * 100

        #choose n_sample descriptors at random
        sample_indices = np.random.choice(all_desc.shape[0], n_sample)
        sample = all_desc[sample_indices]

        #compute PCA and transform the samples
        pca_transform = myPCA(sample, k)
        sample = pca_transform.transform(sample)

        #train GMM
        print("Start fitting GMM")
        GMM_ = GaussianMixture(n_components=k,
                               covariance_type='diag',
                               verbose_interval=1)
        t1 = time.time()
        GMM_.fit(sample)
        print("GMM fit in %s") % (time.time() - t1)

        #Get GMM matrices
        w_, mu_, sigma_ = GMM_.weights_, GMM_.means_, GMM_.covariances_

        #Convert to FP32 (from FP64)
        gmm = w_.astype('float32'), mu_.astype('float32'), sigma_.astype(
            'float32')

        #Save GMM
        h5f = h5py.File("GMM.h5", 'w')
        h5f.create_dataset('gmm1', data=gmm[0])
        h5f.create_dataset('gmm2', data=gmm[1])
        h5f.create_dataset('gmm3', data=gmm[2])
        h5f.close()

        #Save PCA model
        joblib.dump(pca_transform, 'pca_transform_gmm.pkl')

    #compute FVS

    image_fvs = []
    if signature is not None:
        image_descs = np.array(signature)
        image_descs = image_descs.reshape(1, -1)
        image_descs = image_descs.astype('float32')

    for image_desc in image_descs:
        # apply the PCA to the image descriptor
        image_desc = np.expand_dims(image_desc, axis=0)
        image_desc = pca_transform.transform(image_desc - image_desc.mean())
        # compute the Fisher vector, using only the derivative w.r.t mu
        fv = ynumpy.fisher(gmm, image_desc, include='mu')
        image_fvs.append(fv)

    print("FVS processed.")

    # make one matrix with all FVs
    image_fvs = np.vstack(image_fvs)

    #compute PCA to reduce FVs dimensionality (which is k^2)
    if signature is None:
        pca_transform2 = myPCA(image_fvs, dim=512)
        image_fvs = pca_transform2.transform(image_fvs)

        #Save FVS PCA
        joblib.dump(pca_transform2, 'pca_transform_fvs.pkl')

        #Save processed vectors that must be insert in the DB
        h5f = h5py.File("image_fvs.h5", 'w')
        h5f.create_dataset('image_fvs', data=np.real(image_fvs))
        h5f.close()

        print("YAEL SCRIPT: Mission accomplished!")
        return "YAEL SCRIPT: Mission accomplished!"

    pca_transform2 = joblib.load('pca_transform_fvs.pkl')
    image_fv = pca_transform2.transform(image_fvs)
    return image_fv.tolist()
コード例 #28
0
def create_fisher_vector(gmm_list, video_desc, fv_file, fv_sqrt=False, fv_l2=False):
    """
    expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py
    fv_file is the full path to the fisher vector that is created.

    this single video_desc contains the (trajs, hogs, hofs, mbhs) np.ndarrays
    """
    vid_desc_list = []
    vid_desc_list.append(video_desc.traj)
    vid_desc_list.append(video_desc.hog)
    vid_desc_list.append(video_desc.hof)
    vid_desc_list.append(video_desc.mbh)
    # For each video create and normalize a fisher vector for each of the descriptors. Then, concatenate the
    # fisher vectors together to get an extra long fisher vector.
    # Return a list of all of these long fisher vectors. The list should be the same length as the number
    # of input videos.
    fvs = []
    for descriptor,gmm_mean_pca in zip(vid_desc_list,gmm_list):
        if descriptor.size:
            gmm, mean, pca_transform = gmm_mean_pca
            # apply the PCA to the vid_trajectory descriptor
            # each image_desc is of size (X,TRAJ_DIM). Pca_tranform is of size (TRAJ_DIM,TRAJ_DIM/2)
            descrip = descriptor.astype('float32') - mean
            if pca_transform != None:
                descrip = np.dot(descrip, pca_transform)
            
            # compute the Fisher vector, using the derivative w.r.t mu and sigma
            fv = ynumpy.fisher(gmm, descrip, include = ['mu', 'sigma'])
            
            # normalizations are done on each descriptor individually
            if fv_sqrt:
                # power-normalization
                fv = np.sign(fv) * (np.abs(fv) ** 0.5)

            if fv_l2:
                # L2 normalize
                # sum along the rows.
                norms = np.sqrt(np.sum(fv ** 2))
                # -1 allows reshape to infer the length. So it just solidifies the dimensions to (274,1)
                fv /= norms
                # handle images with 0 local descriptor (100 = far away from "normal" images)
                fv[np.isnan(fv)] = 100
            
            # make column to row -wise??
            fvs.append(fv.T)

    # concatenate fvs
    # output_fv = np.hstack(fvs)

    # L2 normalize the entire fv.
    # norm = np.sqrt(np.sum(output_fv ** 2))
    # output_fv /= norm

    # example name:
    #   'v_Archery_g01_c01.fisher.npz'
    # subdirectory name
    # np.savez(fv_file, fv=output_fv)
    # print fv_file
    # return output_fv

    # fvs[0] >>> traj.fv
    # fvs[1] >>> hog.fv
    # fvs[2] >>> hof.fv
    # fvs[3] >>> mbh.fv
    # np.savez(fv_file, fv=fvs)
    # fl['fv'][0,:]
    scipy.io.savemat(fv_file+'.mat', mdict={'fv':fvs}, oned_as='row')
    print fv_file
    return fvs
コード例 #29
0
def train_system(train_filenames, train_labels, detector, options):
    # Read the images and extract the SIFT features.
    Train_descriptors = []
    Train_label_per_descriptor = []
    for i in range(len(train_filenames)):
        filename = train_filenames[i]
        print 'Reading image ' + filename
        ima = cv2.imread(filename)
        gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY)
        if options.spatial_pyramids:
            des = spatial_pyramid(gray, detector, options)
        else:
            des = extract_SIFT_features(gray, detector,
                                        options.detector_options)
        Train_descriptors.append(des)
        Train_label_per_descriptor.append(train_labels[i])

    # Transform everything to numpy arrays
    D = Train_descriptors[0]
    L = np.array([Train_label_per_descriptor[0]] *
                 Train_descriptors[0].shape[0])
    for i in range(1, len(Train_descriptors)):
        D = np.vstack((D, Train_descriptors[i]))
        L = np.hstack((L,
                       np.array([Train_label_per_descriptor[i]] *
                                Train_descriptors[i].shape[0])))

    stdSlr_features = StandardScaler()
    pca = None
    if options.apply_pca:
        stdSlr_features = StandardScaler().fit(D)
        D = stdSlr_features.transform(D)
        pca = PCA(n_components=options.ncomp_pca)
        pca.fit(D)
        D = pca.transform(D)

    print 'Computing gmm with ' + str(options.kmeans) + ' centroids'
    init = time.time()
    gmm = ynumpy.gmm_learn(np.float32(D), options.kmeans)
    end = time.time()
    print 'Done in ' + str(end - init) + ' secs.'

    if options.apply_pca:
        num_features = options.ncomp_pca
    else:
        num_features = 128
    init = time.time()
    fisher = np.zeros(
        (len(Train_descriptors), options.kmeans * num_features * 2),
        dtype=np.float32)
    for i in xrange(len(Train_descriptors)):
        if options.apply_pca:
            descriptor = stdSlr_features.transform(Train_descriptors[i])
            descriptor = pca.trasform(descriptor)
        else:
            descriptor = Train_descriptors[i]
        fisher[i, :] = ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma'])
    end = time.time()
    print 'Done in ' + str(end - init) + ' secs.'

    if options.apply_normalization:
        fisher = applyNormalization(fisher, options)

    # Train a linear SVM classifier
    stdSlr = StandardScaler().fit(fisher)
    D_scaled = stdSlr.transform(fisher)
    print 'Training the SVM classifier...'
    clf = svm.SVC(kernel='linear', C=1).fit(D_scaled, train_labels)
    print 'Done!'

    return stdSlr_features, pca, gmm, stdSlr, clf
コード例 #30
0
def _compute_vd_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \
                            pca_reduction=False, treelike=True, clusters_path=None, verbose=False):
    try:
        makedirs(feats_path)
    except OSError:
        pass

    for k, part in enumerate(traintest_parts):
        # cach'd pca and gmm

        for j, feat_t in enumerate(feat_types):
            try:
                makedirs(join(feats_path, feat_t + '-' + str(k)))
            except OSError:
                pass

        cache = None

        # process videos
        total = len(videonames)
        for i in indices:
            # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types']
            all_done = np.all([isfile(join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl'))
                   for feat_t in feat_types])
            if all_done:
                if verbose:
                    print('[_compute_vd_descriptors] %s -> OK' % videonames[i])
                continue

            if cache is None:
                cache = dict()
                for j, feat_t in enumerate(feat_types):
                    with open(join(intermediates_path, 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl'), 'rb') as f:
                        cache[feat_t] = cPickle.load(f)

            start_time = time.time()

            # object features used for the per-frame FV representation computation (cach'd)
            with open(join(tracklets_path, 'obj', videonames[i] + '.pkl'), 'rb') as f:
                obj = cPickle.load(f)
            with open(join(clusters_path, videonames[i] + '.pkl'), 'rb') as f:
                clusters = cPickle.load(f)

            for j, feat_t in enumerate(feat_types):
                if isfile(join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')):
                    continue

                # load video tracklets' feature
                with open(join(tracklets_path, feat_t, videonames[i] + '.pkl'), 'rb') as f:
                    d = cPickle.load(f)

                if feat_t == 'trj': # (special case)
                    d = convert_positions_to_displacements(d)

                if feat_t == 'mbh':
                    dx = preprocessing.normalize(d[:,:d.shape[1]/2], norm='l1', axis=1)
                    dy = preprocessing.normalize(d[:,d.shape[1]/2:], norm='l1', axis=1)
                    d = np.hstack((dx,dy))
                else:
                    d = preprocessing.normalize(d, norm='l1', axis=1)

                d = rootSIFT(d)

                if pca_reduction:
                    d = cache[feat_t]['pca'].transform(d)  # reduce dimensionality

                d = np.ascontiguousarray(d, dtype=np.float32)  # required in many of Yael functions

                output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')
                # compute FV of the video
                if not treelike:
                    # (in a per-frame representation)
                    fids = np.unique(obj[:,0])
                    V = [] # row-wise fisher vectors (matrix)
                    for f in fids:
                        tmp = d[np.where(obj[:,0] == f)[0],:]  # hopefully this is contiguous if d already was
                        fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, include=INTERNAL_PARAMETERS['fv_repr_feats'])  # f-th frame fisher vec
                        V.append(fv)  # no normalization or nothing (it's done when computing darwin)

                    vd = videodarwin.darwin(np.array(V))

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(v=vd), f)

                else:  # or separately the FVs of the tree nodes
                    vdtree = dict()
                    if len(clusters['tree']) == 1:
                        fids = np.unique(obj[:,0])
                        V = [ynumpy.fisher(cache[feat_t]['gmm'], d[np.where(obj[:,0] == f)[0],:], INTERNAL_PARAMETERS['fv_repr_feats'])
                             for f in fids]
                        vdtree[1] = videodarwin.darwin(np.array(V))
                    else:
                        T = reconstruct_tree_from_leafs(np.unique(clusters['int_paths']))
                        for parent_idx, children_inds in T.iteritems():
                            # (in a per-frame representation)
                            node_inds = np.where(np.any([clusters['int_paths'] == idx for idx in children_inds], axis=0))[0]
                            fids = np.unique(obj[node_inds,0])
                            V = []
                            for f in fids:
                                tmp = d[np.where(obj[node_inds,0] == f)[0],:]
                                fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, INTERNAL_PARAMETERS['fv_repr_feats'])
                                V.append(fv)  # no normalization or nothing (it's done when computing darwin)
                            vdtree[parent_idx] = videodarwin.darwin(np.array(V))

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(tree=vdtree), f)

            elapsed_time = time.time() - start_time
            if verbose:
                print('[_compute_vd_descriptors] %s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
コード例 #31
0
def _compute_vd_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \
                            pca_reduction=False, treelike=True, clusters_path=None, verbose=False):
    try:
        makedirs(feats_path)
    except OSError:
        pass

    for k, part in enumerate(traintest_parts):
        # cach'd pca and gmm

        for j, feat_t in enumerate(feat_types):
            try:
                makedirs(join(feats_path, feat_t + '-' + str(k)))
            except OSError:
                pass

        cache = None

        # process videos
        total = len(videonames)
        for i in indices:
            # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types']
            all_done = np.all([
                isfile(
                    join(feats_path, feat_t + '-' + str(k),
                         videonames[i] + '.pkl')) for feat_t in feat_types
            ])
            if all_done:
                if verbose:
                    print('[_compute_vd_descriptors] %s -> OK' % videonames[i])
                continue

            if cache is None:
                cache = dict()
                for j, feat_t in enumerate(feat_types):
                    with open(
                            join(
                                intermediates_path,
                                'gmm' + ('_pca-' if pca_reduction else '-') +
                                feat_t + '-' + str(k) + '.pkl'), 'rb') as f:
                        cache[feat_t] = cPickle.load(f)

            start_time = time.time()

            # object features used for the per-frame FV representation computation (cach'd)
            with open(join(tracklets_path, 'obj', videonames[i] + '.pkl'),
                      'rb') as f:
                obj = cPickle.load(f)
            with open(join(clusters_path, videonames[i] + '.pkl'), 'rb') as f:
                clusters = cPickle.load(f)

            for j, feat_t in enumerate(feat_types):
                if isfile(
                        join(feats_path, feat_t + '-' + str(k),
                             videonames[i] + '.pkl')):
                    continue

                # load video tracklets' feature
                with open(join(tracklets_path, feat_t, videonames[i] + '.pkl'),
                          'rb') as f:
                    d = cPickle.load(f)

                if feat_t == 'trj':  # (special case)
                    d = convert_positions_to_displacements(d)

                if feat_t == 'mbh':
                    dx = preprocessing.normalize(d[:, :d.shape[1] / 2],
                                                 norm='l1',
                                                 axis=1)
                    dy = preprocessing.normalize(d[:, d.shape[1] / 2:],
                                                 norm='l1',
                                                 axis=1)
                    d = np.hstack((dx, dy))
                else:
                    d = preprocessing.normalize(d, norm='l1', axis=1)

                d = rootSIFT(d)

                if pca_reduction:
                    d = cache[feat_t]['pca'].transform(
                        d)  # reduce dimensionality

                d = np.ascontiguousarray(
                    d, dtype=np.float32)  # required in many of Yael functions

                output_filepath = join(feats_path, feat_t + '-' + str(k),
                                       videonames[i] + '.pkl')
                # compute FV of the video
                if not treelike:
                    # (in a per-frame representation)
                    fids = np.unique(obj[:, 0])
                    V = []  # row-wise fisher vectors (matrix)
                    for f in fids:
                        tmp = d[np.where(
                            obj[:, 0] == f
                        )[0], :]  # hopefully this is contiguous if d already was
                        fv = ynumpy.fisher(
                            cache[feat_t]['gmm'],
                            tmp,
                            include=INTERNAL_PARAMETERS['fv_repr_feats']
                        )  # f-th frame fisher vec
                        V.append(
                            fv
                        )  # no normalization or nothing (it's done when computing darwin)

                    vd = videodarwin.darwin(np.array(V))

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(v=vd), f)

                else:  # or separately the FVs of the tree nodes
                    vdtree = dict()
                    if len(clusters['tree']) == 1:
                        fids = np.unique(obj[:, 0])
                        V = [
                            ynumpy.fisher(cache[feat_t]['gmm'],
                                          d[np.where(obj[:, 0] == f)[0], :],
                                          INTERNAL_PARAMETERS['fv_repr_feats'])
                            for f in fids
                        ]
                        vdtree[1] = videodarwin.darwin(np.array(V))
                    else:
                        T = reconstruct_tree_from_leafs(
                            np.unique(clusters['int_paths']))
                        for parent_idx, children_inds in T.iteritems():
                            # (in a per-frame representation)
                            node_inds = np.where(
                                np.any([
                                    clusters['int_paths'] == idx
                                    for idx in children_inds
                                ],
                                       axis=0))[0]
                            fids = np.unique(obj[node_inds, 0])
                            V = []
                            for f in fids:
                                tmp = d[np.where(obj[node_inds, 0] == f)[0], :]
                                fv = ynumpy.fisher(
                                    cache[feat_t]['gmm'], tmp,
                                    INTERNAL_PARAMETERS['fv_repr_feats'])
                                V.append(
                                    fv
                                )  # no normalization or nothing (it's done when computing darwin)
                            vdtree[parent_idx] = videodarwin.darwin(
                                np.array(V))

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(tree=vdtree), f)

            elapsed_time = time.time() - start_time
            if verbose:
                print('[_compute_vd_descriptors] %s -> DONE (in %.2f secs)' %
                      (videonames[i], elapsed_time))
コード例 #32
0
      len(Train_descriptors[i])] = Train_descriptors[i]
    startingpoint += len(Train_descriptors[i])

k = 32

print 'Computing gmm with ' + str(k) + ' centroids'
init = time.time()
gmm = ynumpy.gmm_learn(np.float32(D), k)
end = time.time()
print 'Done in ' + str(end - init) + ' secs.'

init = time.time()
fisher = np.zeros((len(Train_descriptors), k * 128 * 2), dtype=np.float32)
for i in xrange(len(Train_descriptors)):
    fisher[i, :] = ynumpy.fisher(gmm,
                                 Train_descriptors[i],
                                 include=['mu', 'sigma'])

end = time.time()
print 'Done in ' + str(end - init) + ' secs.'

# Train a linear SVM classifier

stdSlr = StandardScaler().fit(fisher)
D_scaled = stdSlr.transform(fisher)
print 'Training the SVM classifier...'
clf = svm.SVC(kernel='linear', C=1).fit(D_scaled, train_labels)
print 'Done!'

# get all the test data and predict their labels
fisher_test = np.zeros((len(test_images_filenames), k * 128 * 2),
コード例 #33
0
def _compute_fv_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \
                            pca_reduction=True, treelike=True, clusters_path=None):
    if not exists(feats_path):
        makedirs(feats_path)

    for k, part in enumerate(traintest_parts):
        # cach'd pca and gmm
        cache = dict()
        for j, feat_t in enumerate(feat_types):
            if not exists(feats_path + feat_t + '-' + str(k)):
                makedirs(feats_path + feat_t + '-' + str(k))
            with open(intermediates_path + 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl', 'rb') as f:
                cache[feat_t] = cPickle.load(f)

        # process videos
        total = len(videonames)
        for i in indices:
            # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types']
            output_filepath = join(feats_path, feat_types[-1] + '-' + str(k), videonames[i] + '.pkl')
            if isfile(output_filepath):
                # for j, feat_t in enumerate(feat_types):
                #     featnames.setdefault(feat_t, []).append(feats_path + feat_t + '/' + videonames[i] + '-fvtree.pkl')
                print('%s -> OK' % output_filepath)
                continue

            start_time = time.time()

            # object features used for the per-frame FV representation computation (cach'd)
            with open(tracklets_path + 'obj/' + videonames[i] + '.pkl', 'rb') as f:
                obj = cPickle.load(f)
            with open(clusters_path + videonames[i] + '.pkl', 'rb') as f:
                clusters = cPickle.load(f)

            for j, feat_t in enumerate(feat_types):
                # load video tracklets' feature
                with open(tracklets_path + feat_t + '/' + videonames[i] + '.pkl', 'rb') as f:
                    d = cPickle.load(f)
                    if feat_t == 'trj': # (special case)
                        d = convert_positions_to_displacements(d)

                # pre-processing
                d = rootSIFT(preprocessing.normalize(d, norm='l1', axis=1))  # https://hal.inria.fr/hal-00873267v2/document

                if pca_reduction:
                    d = cache[feat_t]['pca'].transform(d)  # reduce dimensionality

                d = np.ascontiguousarray(d, dtype=np.float32)  # required in many of Yael functions

                output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')
                # compute FV of the video
                if not treelike:
                    fv = ynumpy.fisher(cache[feat_t]['gmm'], d, INTERNAL_PARAMETERS['fv_repr_feats'])  # fisher vec
                    fv = preprocessing.normalize(fv)
                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(v=fv), f)

                else:  # or separately the FVs of the tree nodes
                    T = reconstruct_tree_from_leafs(np.unique(clusters['int_paths']))
                    fvtree = dict()
                    for parent_idx, children_inds in T.iteritems():
                        # (in a global representation)
                        node_inds = np.where(np.any([clusters['int_paths'] == idx for idx in children_inds], axis=0))[0]
                        fv = ynumpy.fisher(cache[feat_t]['gmm'], d[node_inds,:], INTERNAL_PARAMETERS['fv_repr_feats'])  # fisher vec
                        fvtree[parent_idx] = normalize(rootSIFT(fv,p=0.5), norm='l2')  # https://www.robots.ox.ac.uk/~vgg/rg/papers/peronnin_etal_ECCV10.pdf

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(tree=fvtree), f)

            elapsed_time = time.time() - start_time
            print('%s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
コード例 #34
0
    video_data = np.genfromtxt(DATASET_PATH + vname)
    # delete first ten columns
    video_data = video_data[:, 10:]
    video_data = video_data.astype('float32')
    # seperate data into different features
    video_data_traj = video_data[:, 0:30]
    video_data_hog = video_data[:, 30:126]
    video_data_hof = video_data[:, 126:234]
    video_data_mbh = video_data[:, 234:426]
    # apply the PCA to the image descriptor
    video_data_traj = np.dot(video_data_traj - mean_traj, pca_traj)
    video_data_hog = np.dot(video_data_hog - mean_hog, pca_hog)
    video_data_hof = np.dot(video_data_hof - mean_hof, pca_hof)
    video_data_mbh = np.dot(video_data_mbh - mean_mbh, pca_mbh)
    # compute the Fisher vector, using the derivative w.r.t mu and sigma
    fv_traj = ynumpy.fisher(gmm_traj, video_data_traj, include='mu, sigma')
    fv_hog = ynumpy.fisher(gmm_hog, video_data_hog, include='mu, sigma')
    fv_hof = ynumpy.fisher(gmm_hof, video_data_hof, include='mu, sigma')
    fv_mbh = ynumpy.fisher(gmm_mbh, video_data_mbh, include='mu, sigma')
    # concatenate the fisher vectors
    fv = np.concatenate((fv_traj, fv_hog, fv_hof, fv_mbh))
    print fv.shape
    image_fvs.append(fv)

# make one matrix with all FVs
image_fvs = np.vstack(image_fvs)
# normalizations are done on all descriptors at once
# power-normalization
image_fvs = np.sign(image_fvs) * np.abs(image_fvs)**0.5
# L2 normalize
norms = np.sqrt(np.sum(image_fvs**2, 1))
コード例 #35
0
def _compute_vd_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \
                            pca_reduction=True, treelike=True, clusters_path=None):
    if not exists(feats_path):
        makedirs(feats_path)

    for k, part in enumerate(traintest_parts):
        # cach'd pca and gmm
        cache = dict()
        for j, feat_t in enumerate(feat_types):
            if not exists(feats_path + feat_t + '-' + str(k)):
                makedirs(feats_path + feat_t + '-' + str(k))
            with open(intermediates_path + 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl', 'rb') as f:
                cache[feat_t] = cPickle.load(f)

        # process videos
        total = len(videonames)
        for i in indices:
            # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types']
            output_filepath = join(feats_path, feat_types[-1] + '-' + str(k), videonames[i] + '.pkl')
            if isfile(output_filepath):
                # for j, feat_t in enumerate(feat_types):
                #     featnames.setdefault(feat_t, []).append(feats_path + feat_t + '/' + videonames[i] + '-fvtree.pkl')
                print('%s -> OK' % output_filepath)
                continue

            start_time = time.time()

            # object features used for the per-frame FV representation computation (cach'd)
            with open(tracklets_path + 'obj/' + videonames[i] + '.pkl', 'rb') as f:
                obj = cPickle.load(f)
            with open(clusters_path + videonames[i] + '.pkl', 'rb') as f:
                clusters = cPickle.load(f)

            for j, feat_t in enumerate(feat_types):
                # load video tracklets' feature
                with open(tracklets_path + feat_t + '/' + videonames[i] + '.pkl', 'rb') as f:
                    d = cPickle.load(f)
                    if feat_t == 'trj': # (special case)
                        d = convert_positions_to_displacements(d)

                # pre-processing
                d = rootSIFT(preprocessing.normalize(d, norm='l1', axis=1))  # https://hal.inria.fr/hal-00873267v2/document

                if pca_reduction:
                    d = cache[feat_t]['pca'].transform(d)  # reduce dimensionality

                d = np.ascontiguousarray(d, dtype=np.float32)  # required in many of Yael functions

                output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')
                # compute FV of the video
                if not treelike:
                    # (in a per-frame representation)
                    fids = np.unique(obj[:,0])
                    V = [] # row-wise fisher vectors (matrix)
                    for f in fids:
                        tmp = d[np.where(obj[:,0] == f)[0],:]  # hopefully this is contiguous if d already was
                        fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, include=INTERNAL_PARAMETERS['fv_repr_feats'])  # f-th frame fisher vec
                        V.append(fv)  # no normalization or nothing (it's done when computing darwin)

                    vd = normalize(videodarwin.darwin(np.array(V)))

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(v=vd), f)

                else:  # or separately the FVs of the tree nodes
                    T = reconstruct_tree_from_leafs(np.unique(clusters['int_paths']))
                    vdtree = dict()
                    for parent_idx, children_inds in T.iteritems():
                        # (in a per-frame representation)
                        node_inds = np.where(np.any([clusters['int_paths'] == idx for idx in children_inds], axis=0))[0]
                        fids = np.unique(obj[node_inds,0])
                        # dim = INTERNAL_PARAMETERS['fv_gmm_k'] * len(INTERNAL_PARAMETERS['fv_repr_feats']) * d.shape[1]
                        V = []
                        for f in fids:
                            tmp = d[np.where(obj[node_inds,0] == f)[0],:]
                            fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, INTERNAL_PARAMETERS['fv_repr_feats'])
                            V.append(fv)  # no normalization or nothing (it's done when computing darwin)
                        vdtree[parent_idx] = normalize(videodarwin.darwin(np.array(V)))

                    with open(output_filepath, 'wb') as f:
                        cPickle.dump(dict(tree=vdtree), f)

            elapsed_time = time.time() - start_time
            print('%s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
コード例 #36
0
ファイル: test_ynumpy.py プロジェクト: GarfieldEr007/yael
print "result centroids ="
print centroids[:10,:]

print "gmm:"

gmm = ynumpy.gmm_learn(v, 3)

(w, mu, sigma) = gmm

print "mu = "
print mu

print "sigma = "
print sigma


muc = numpy.vstack((mu[0, :],
                    mu[0, :])); 
                    
#                    mu[1, :],
#                    mu[1, :],
#                    mu[1, :]))

print "mu=", mu
muc += numpy.random.normal(-0.02, 0.02, size = muc.shape)
print "muc=", muc

fish = ynumpy.fisher(gmm, muc)

print fish
コード例 #37
0
ファイル: train_video.py プロジェクト: ai3DVision/yael
	video_data = np.genfromtxt(DATASET_PATH + vname)
	# delete first ten columns
	video_data = video_data[:,10:]
	video_data = video_data.astype('float32')
	# seperate data into different features
	video_data_traj = video_data[:, 0:30]
	video_data_hog  = video_data[:, 30:126]
	video_data_hof  = video_data[:, 126:234]
	video_data_mbh  = video_data[:, 234:426]
	# apply the PCA to the image descriptor
	video_data_traj = np.dot(video_data_traj - mean_traj, pca_traj)
	video_data_hog = np.dot(video_data_hog - mean_hog, pca_hog)
	video_data_hof = np.dot(video_data_hof - mean_hof, pca_hof)
	video_data_mbh = np.dot(video_data_mbh - mean_mbh, pca_mbh)
	# compute the Fisher vector, using the derivative w.r.t mu and sigma
	fv_traj = ynumpy.fisher(gmm_traj, video_data_traj, include = 'mu, sigma')
	fv_hog = ynumpy.fisher(gmm_hog, video_data_hog, include = 'mu, sigma')
	fv_hof = ynumpy.fisher(gmm_hof, video_data_hof, include = 'mu, sigma')
	fv_mbh = ynumpy.fisher(gmm_mbh, video_data_mbh, include = 'mu, sigma')
	# concatenate the fisher vectors
	fv = np.concatenate((fv_traj, fv_hog, fv_hof, fv_mbh))
	print fv.shape
	image_fvs.append(fv)

# make one matrix with all FVs
image_fvs = np.vstack(image_fvs)
# normalizations are done on all descriptors at once
# power-normalization
image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5
# L2 normalize
norms = np.sqrt(np.sum(image_fvs ** 2, 1))
コード例 #38
0
def getCrossVal(folds_num, folds_descriptors, start, nfeatures, code_size,
                kernel, C, output_layer, n_comps, reduction, decision,
                sampling_step, sampling_type):
    accuracies = []

    for fold_i in range(folds_num):  # 5 folds
        # Transform everything to numpy arrays
        Train_descriptors = []
        train_labels = []

        # select training images
        for j in range(folds_num):
            if fold_i != j:
                Train_descriptors.extend(folds_descriptors[j]['descriptors'])
                train_labels.extend(
                    folds_descriptors[j]['label_per_descriptor'])

        Train_descriptors = np.asarray(Train_descriptors)

        # Transform everything to numpy arrays
        size_descriptors = Train_descriptors[0][0].shape[-1]
        # for D we only need the first level of the pyramid (because it already contains all points)
        D = np.zeros(
            (np.sum([len(p[0]) for p in Train_descriptors]), size_descriptors),
            dtype=np.uint8)
        startingpoint = 0
        for i in range(len(Train_descriptors)):
            D[startingpoint:startingpoint +
              len(Train_descriptors[i][0])] = Train_descriptors[i][0]
            startingpoint += len(Train_descriptors[i][0])
        if reduction == 'pca':
            D, pca_reducer = PCA_reduce(D, n_comps)

        if decision == 'bow':
            k = code_size
            # Compute Codebook
            gmm = compute_codebook(D, k, nfeatures, fold_i, output_layer,
                                   D.shape[1], sampling_step, sampling_type)

            init = time.time()
            samples = np.zeros(
                (len(Train_descriptors),
                 k * D.shape[1] * 2 * Train_descriptors.shape[1]),
                dtype=np.float32)  #TODO: change 128
            for i in xrange(len(Train_descriptors)):
                for j in range(Train_descriptors.shape[1]):  #number of levels
                    if reduction == 'pca':
                        des = pca_reducer.transform(
                            Train_descriptors[i][j])  # for pyramid level j
                    else:
                        des = Train_descriptors[i][j]  # for pyramid level j
                    samples[i,
                            j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] *
                            2] = ynumpy.fisher(gmm,
                                               np.float32(des),
                                               include=['mu', 'sigma'])

            end = time.time()
            print 'Done in ' + str(end - init) + ' secs.'
        elif decision == 'svm':
            samples = D
        else:
            print 'wrong decision type use: bow or svm'
            quit()

        # Train a linear SVM classifier
        stdSlr = StandardScaler().fit(samples)
        D_scaled = stdSlr.transform(samples)

        print 'Training the SVM classifier...'
        clf = svm.SVC(kernel=kernel, C=C).fit(D_scaled, train_labels)
        print 'Done!'

        # get all the test data and predict their labels
        test_images_desc = folds_descriptors[fold_i]['descriptors']
        #print folds_descriptors[fold_i]['descriptors'][0].shape
        test_labels = folds_descriptors[fold_i]['label_per_descriptor']

        test_images_desc = np.asarray(test_images_desc)
        #test_images_desc = test_images_desc.squeeze()
        print test_images_desc.shape

        # Apply BoW
        if decision == 'bow':
            fisher_test = np.zeros(
                (len(test_images_desc),
                 k * D.shape[1] * 2 * test_images_desc.shape[1]),
                dtype=np.float32)
            for i in range(len(test_images_desc)):
                for j in range(test_images_desc.shape[1]):  #number of levels
                    des = test_images_desc[i][
                        j]  # now only working with 1 PYRAMID LEVEL [0]
                    if reduction == 'pca':
                        des = pca_reducer.transform(des)
                    fisher_test[i, j * k * D.shape[1] * 2:(j + 1) * k *
                                D.shape[1] * 2] = ynumpy.fisher(
                                    gmm,
                                    np.float32(des),
                                    include=['mu', 'sigma'])
            test_images_desc = fisher_test
        else:
            test_images_desc = test_images_desc.squeeze()
            if reduction == 'pca':
                test_images_desc = pca_reducer.transform(test_images_desc)

        test_images_desc = stdSlr.transform(test_images_desc)
        accuracy = 100 * clf.score(test_images_desc, test_labels)

        print 'Fold ' + str(fold_i) + ' accuracy: ' + str(accuracy)

        accuracies.append(accuracy)

    return np.asarray(accuracies)
コード例 #39
0
ファイル: session4.py プロジェクト: vcampmany/M3_ImageClassi
def main(nfeatures=100,
         code_size=512,
         n_components=60,
         kernel='linear',
         C=1,
         reduction=None,
         output_layer='fc2',
         decision='svm',
         sampling_step=4,
         sampling_type='default'):
    start = time.time()

    # read the train and test files
    train_images_filenames, test_images_filenames, train_labels, test_labels = get_dataset(
    )

    # create the CNN detector object
    cnn_model = cnn_features(output_layer)

    # extract SIFT keypoints and descriptors
    # store descriptors in a python list of numpy arrays

    Train_descriptors, Train_label_per_descriptor = getDescriptors(
        cnn_model, train_images_filenames, train_labels, decision,
        sampling_step, sampling_type)

    Train_descriptors = np.asarray(Train_descriptors)

    # Transform everything to numpy arrays
    size_descriptors = Train_descriptors[0][0].shape[-1]
    # for D we only need the first level of the pyramid (because it already contains all points)
    D = np.zeros(
        (np.sum([len(p[0]) for p in Train_descriptors]), size_descriptors),
        dtype=np.uint8)
    startingpoint = 0
    for i in range(len(Train_descriptors)):
        D[startingpoint:startingpoint +
          len(Train_descriptors[i][0])] = Train_descriptors[i][0]
        startingpoint += len(Train_descriptors[i][0])
    if reduction == 'pca':
        D, pca_reducer = PCA_reduce(D, n_components)

    if decision == 'bow':
        k = code_size
        # Compute Codebook
        gmm = compute_codebook(D, k, nfeatures, None, output_layer, D.shape[1],
                               sampling_step, sampling_type)

        init = time.time()
        samples = np.zeros((len(Train_descriptors),
                            k * D.shape[1] * 2 * Train_descriptors.shape[1]),
                           dtype=np.float32)  #TODO: change 128
        for i in xrange(len(Train_descriptors)):
            for j in range(Train_descriptors.shape[1]):  #number of levels
                if reduction == 'pca':
                    des = pca_reducer.transform(
                        Train_descriptors[i][j])  # for pyramid level j
                else:
                    des = Train_descriptors[i][j]  # for pyramid level j
                samples[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] *
                        2] = ynumpy.fisher(gmm,
                                           np.float32(des),
                                           include=['mu', 'sigma'])

        end = time.time()
        print 'Done in ' + str(end - init) + ' secs.'
    else:
        samples = D

    # Train a linear SVM classifier
    stdSlr = StandardScaler().fit(samples)
    D_scaled = stdSlr.transform(samples)

    print 'Training the SVM classifier...'
    clf = svm.SVC(kernel=kernel, C=C).fit(D_scaled, train_labels)
    print 'Done!'

    # Apply BoW
    if decision == 'bow':

        test_descriptors, test_label_per_descriptor = getDescriptors(
            cnn_model, test_images_filenames, test_labels, decision,
            sampling_step, sampling_type)
        test_descriptors = np.asarray(test_descriptors)

        fisher_test = np.zeros(
            (len(test_descriptors),
             k * D.shape[1] * 2 * Train_descriptors.shape[1]),
            dtype=np.float32)

        for i in range(len(test_descriptors)):
            for j in range(test_descriptors.shape[1]):  #number of levels
                des = test_descriptors[i][
                    j]  # now only working with 1 PYRAMID LEVEL [0]
                if reduction == 'pca':
                    des = pca_reducer.transform(des)
                fisher_test[i,
                            j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] *
                            2] = ynumpy.fisher(gmm,
                                               np.float32(des),
                                               include=['mu', 'sigma'])
        test_images_desc = fisher_test
    else:
        test_descriptors, test_label_per_descriptor = getDescriptors(
            cnn_model, test_images_filenames, test_labels, decision,
            sampling_step, sampling_type)
        test_descriptors = np.asarray(test_descriptors)

        test_images_desc = test_descriptors.squeeze()
        if reduction == 'pca':
            test_images_desc = pca_reducer.transform(test_images_desc)

    test_images_desc = stdSlr.transform(test_images_desc)
    accuracy = 100 * clf.score(test_images_desc, test_labels)

    print 'Final accuracy: ' + str(accuracy)

    end = time.time()
    print 'Done in ' + str(end - start) + ' secs.'