Esempio n. 1
0
def gmm_model(sample, k_gmm, PCA=False):
    """
    Returns a tuple: (gmm,mean,pca_transform)
    gmm is the ynumpy gmm model fro the sample data. 
    pca_tranform is None if PCA is True.
    Reduces the dimensions of the sample (by 50%) if PCA is true
    """

    print "Building GMM model"
    # until now sample was in uint8. Convert to float32
    sample = sample.astype('float32')
    # compute mean and covariance matrix for the PCA
    mean = sample.mean(axis = 0) #for each row
    sample = sample - mean
    pca_transform = None
    if PCA:
        cov = np.dot(sample.T, sample)

        #decide to keep 1/2 of the original components, so vid_trajs_bm.shape[1]/2
        #compute PCA matrix and keep only 1/2 of the dimensions.
        orig_comps = sample.shape[1]
        pca_dim = orig_comps/2
        #eigvecs are normalized.
        eigvals, eigvecs = np.linalg.eig(cov)
        perm = eigvals.argsort() # sort by increasing eigenvalue 
        pca_transform = eigvecs[:, perm[orig_comps-pca_dim:orig_comps]]   # eigenvectors for the 64 last eigenvalues
        # transform sample with PCA (note that numpy imposes line-vectors,
        # so we right-multiply the vectors)
        sample = np.dot(sample, pca_transform)
    # train GMM
    gmm = ynumpy.gmm_learn(sample, k_gmm)
    toReturn = (gmm,mean,pca_transform)
    return toReturn
Esempio n. 2
0
def save_model(sample, type_feature, RESULT_PATH, TYPE):
	'''Save GMM models, mean, pca_transform of sample data

	Parameters
	----------
	sample: np.ndarray type,
		Sample data 
	type_feature: {'traj', 'hog', 'hof', 'mbh'}
		One of four types of feature
	RESULT_PATH: string
		Path of result files
	TYPE: {'video', 'sensor'}
		Type of data

	Returns
	-------
	gmm: gmm model
	mean: mean value of data
	pca_transform: pca transform of data
	'''
	# GMM part
	k = 25
	if type_feature == 'traj':
		DIM_AFTER_PCA = 0.5 * 30
	elif type_feature == 'hog':
		DIM_AFTER_PCA = 0.5 * 96
	elif type_feature == 'hof':
		DIM_AFTER_PCA = 0.5 * 108
	elif type_feature == 'mbh':
		DIM_AFTER_PCA = 0.5 * 192

	# compute mean and covariance matrix for the PCA
	mean = sample.mean(axis = 0)
	sample = sample - mean
	cov = np.dot(sample.T, sample)
	# compute PCA matrix and keep only 64 dimensions
	eigvals, eigvecs = np.linalg.eig(cov)
	# sort by increasing eigenvalue
	perm = eigvals.argsort()
	# eigenvectors for the 64 last eigenvalues
	pca_transform = eigvecs[:, perm[-DIM_AFTER_PCA:]]
	# transform sample with PCA (note that numpy imposes line-vectors,
	# so we right-multiply the vectors)
	sample = np.dot(sample, pca_transform)
	# train GMM
	gmm = ynumpy.gmm_learn(sample, k)
	# save mean value to file
	file_mean = open(RESULT_PATH + 'mean_' + type_feature + '_' + TYPE + '.pkl', 'wb')
	pickle.dump(mean, file_mean)
	file_mean.close()
	# save pca transform to file
	file_pca = open(RESULT_PATH + 'pca_' + type_feature + '_' + TYPE + '.pkl', 'wb')
	pickle.dump(pca_transform, file_pca)
	file_pca.close()
	# save GMM model to file
	file_gmm = open(RESULT_PATH + 'gmm_' + type_feature + '_' + TYPE + '.pkl', 'wb')
	pickle.dump(gmm, file_gmm)
	file_gmm.close()

	return gmm, mean, pca_transform
def train_fv_gmms(tracklets_path, videonames, traintest_parts, feat_types, intermediates_path, pca_reduction=False, nt=4, verbose=False):
    try:
        makedirs(intermediates_path)
    except OSError:
        pass

    for k, part in enumerate(traintest_parts):
        train_inds = np.where(np.array(part) <= 0)[0]  # train codebook for each possible training parition
        num_samples_per_vid = int(INTERNAL_PARAMETERS['n_samples'] / float(len(train_inds)))

        # process the videos
        for i, feat_t in enumerate(feat_types):
            D = None

            # Train GMMs
            output_filepath = join(intermediates_path, 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl')
            if isfile(output_filepath):
                if verbose:
                    print('[train_fv_gmms] %s -> OK' % output_filepath)
                continue

            start_time = time.time()

            D = load_tracklets_sample(tracklets_path, videonames, train_inds, feat_t, num_samples_per_vid, verbose=verbose)

            # (special case) trajectory features are originally positions
            if feat_t == 'trj':
                D = convert_positions_to_displacements(D)

            if feat_t == 'mbh':
                Dx = preprocessing.normalize(D[:,:D.shape[1]/2], norm='l1', axis=1)
                Dy = preprocessing.normalize(D[:,D.shape[1]/2:], norm='l1', axis=1)
                D = np.hstack((Dx,Dy))
            else:
                D = preprocessing.normalize(D, norm='l1', axis=1)

            if feat_t != 'trj':
                D = rootSIFT(D)

            # compute PCA map and reduce dimensionality
            if pca_reduction:
                pca = PCA(n_components=int(INTERNAL_PARAMETERS['reduction_factor']*D.shape[1]), copy=False)
                D = pca.fit_transform(D)

            # train GMMs for later FV computation
            D = np.ascontiguousarray(D, dtype=np.float32)
            gmm = ynumpy.gmm_learn(D, INTERNAL_PARAMETERS['fv_gmm_k'], nt=nt, niter=500, redo=1, verbose=verbose)

            with open(output_filepath, 'wb') as f:
                cPickle.dump(dict(pca=(pca if pca_reduction else None), gmm=gmm), f)
            # with open(join(intermediates_path, 'gmm-sample' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl'), 'wb') as f:
            #     cPickle.dump(D,f)

            elapsed_time = time.time() - start_time
            if verbose:
                print('[train_fv_gmms] %s -> DONE (in %.2f secs)' % (feat_t, elapsed_time))
Esempio n. 4
0
    print "vectors = "
    print v
    print "meta info = "
    print meta


print "kmeans:"

centroids = ynumpy.kmeans(v, 3)

print "result centroids ="
print centroids[:10,:]

print "gmm:"

gmm = ynumpy.gmm_learn(v, 3)

(w, mu, sigma) = gmm

print "mu = "
print mu

print "sigma = "
print sigma


muc = numpy.vstack((mu[0, :],
                    mu[0, :])); 
                    
#                    mu[1, :],
#                    mu[1, :],
Esempio n. 5
0
# compute mean and covariance matrix for the PCA
mean = sample.mean(axis = 0)
sample = sample - mean
cov = np.dot(sample.T, sample)

# compute PCA matrix and keep only 64 dimensions
eigvals, eigvecs = np.linalg.eig(cov)
perm = eigvals.argsort()                   # sort by increasing eigenvalue
pca_transform = eigvecs[:, perm[64:128]]   # eigenvectors for the 64 last eigenvalues

# transform sample with PCA (note that numpy imposes line-vectors,
# so we right-multiply the vectors)
sample = np.dot(sample, pca_transform)

# train GMM
gmm = ynumpy.gmm_learn(sample, k)

image_fvs = []
for image_desc in image_descs:
   # apply the PCA to the image descriptor
   image_desc = np.dot(image_desc - mean, pca_transform)
   # compute the Fisher vector, using the derivative w.r.t mu and sigma
   fv = ynumpy.fisher(gmm, image_desc, include = 'mu, sigma')
   image_fvs.append(fv)

# make one matrix with all FVs
image_fvs = np.vstack(image_fvs)

# normalizations are done on all descriptors at once

# power-normalization
Esempio n. 6
0
print("Training set of %d local descriptors in %d dimensions" % (train_set.shape[0], train_set.shape[1]))


trainset_size = num_gmm_components * 1000

if trainset_size < train_set.shape[0]:
    print("Subsampling to %d points" % trainset_size)
    subset = numpy.array(
        random.sample(range(train_set.shape[0]), trainset_size))
    train_set = train_set[subset]


print("Training Gaussian mixture model with %d components" % num_gmm_components)

train_set = train_set.astype('float32')
gmm = ynumpy.gmm_learn(train_set, num_gmm_components)

print("Make the image index")

dataset = []
queries = []

if show:
    fig = plt.figure(figsize=(10, 10))
    fig.canvas.set_window_title("100 image dataset")
    plot_idx = 1

for i in image_range:
    filename = "%s/ukbench%05d.siftgeo" % (sift_directory, i)
    print("  " + filename + "\r")
    sys.stdout.flush()
def train_fv_gmms(tracklets_path, videonames, traintest_parts, feat_types, intermediates_path, pca_reduction=True, nt=4):
    if not exists(intermediates_path):
        makedirs(intermediates_path)

    for k, part in enumerate(traintest_parts):
        train_inds = np.where(part <= 0)[0]  # train codebook for each possible training parition
        total = len(train_inds)
        num_samples_per_vid = int(INTERNAL_PARAMETERS['n_samples'] / float(total))

        # process the videos
        for i, feat_t in enumerate(feat_types):
            output_filepath = intermediates_path + 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl'
            if isfile(output_filepath):
                print('%s -> OK' % output_filepath)
                continue

            start_time = time.time()

            D = None  # feat_t's sampled tracklets
            ptr = 0
            for j in range(0, total):
                idx = train_inds[j]

                filepath = tracklets_path + feat_t + '/' + videonames[idx] + '.pkl'
                if not isfile(filepath):
                    sys.stderr.write('# ERROR: missing training instance'
                                     ' {}\n'.format(filepath))
                    sys.stderr.flush()
                    quit()

                with open(filepath, 'rb') as f:
                    d = cPickle.load(f)

                # init sample
                if D is None:
                    D = np.zeros((INTERNAL_PARAMETERS['n_samples'], d.shape[1]), dtype=np.float32)
                # create a random permutation for sampling some tracklets in this vids
                randp = np.random.permutation(d.shape[0])
                if d.shape[0] > num_samples_per_vid:
                    randp = randp[:num_samples_per_vid]
                D[ptr:ptr+len(randp),:] = d[randp,:]
                ptr += len(randp)
            D = D[:ptr,:]  # cut out extra reserved space


            # (special case) trajectory features are originally positions
            if feat_t == 'trj':
                D = convert_positions_to_displacements(D)

            # scale (rootSIFT)
            D = rootSIFT(preprocessing.normalize(D, norm='l1', axis=1))

            # compute PCA map and reduce dimensionality
            if pca_reduction:
                pca = PCA(n_components=int(INTERNAL_PARAMETERS['reduction_factor']*D.shape[1]), copy=False)
                D = pca.fit_transform(D)

            # train GMMs for later FV computation
            D = np.ascontiguousarray(D, dtype=np.float32)
            gmm = ynumpy.gmm_learn(D, INTERNAL_PARAMETERS['fv_gmm_k'], nt=nt, niter=100, redo=3)

            with open(output_filepath, 'wb') as f:
                cPickle.dump(dict(pca=(pca if pca_reduction else None), gmm=gmm), f)

            elapsed_time = time.time() - start_time
            print('%s -> DONE (in %.2f secs)' % (feat_t, elapsed_time))
Esempio n. 8
0
dat = np.load('test/py/test_dat.npy')

cluster_num = len(set(label))
print cluster_num

cluster_w = np.array([(i + 2) % 5 != 0 for i in xrange(cluster_num)])

obs = np.vstack([np.vstack([dat[label == c]] * cluster_w[c]) for c in xrange(cluster_num) if cluster_w[c] != 0])

obs_w = dat.copy()
weight = cluster_w[label]

print obs_w.shape
print weight.shape

ret_original = ynumpy.gmm_learn(obs.astype(np.float32), cluster_num - 1)
ret_modified = ynumpy.gmm_learn_sw(obs_w.astype(np.float32), weight.astype(np.float32), cluster_num - 1)

print ''
print '======= result ======'
print ''

print ret_original
print ret_modified

orig_w, orig_mu, orig_sigma = ret_original
modi_w, modi_mu, modi_sigma = ret_modified

orig_i = np.argsort(orig_w)
modi_i = np.argsort(modi_w)