def test_kmeans_ANN(): num_data = 5000 num_centers = 4 dimension = 8 noise_level = 0.1 centers = np.random.randint(-40, 40, (num_centers, dimension)).astype(np.float32) data = np.empty((num_data, dimension), dtype=np.float32) for i in range(num_data): data[i] = centers[i % num_centers] + np.random.random_sample(dimension)*noise_level found_centers = kmeans(data, num_centers, initialization="PLUSPLUS", algorithm="ANN") found_assignments = kmeans_quantize(data, found_centers, algorithm="ANN") assert found_centers.dtype == np.float32 assert found_centers.shape == (num_centers, dimension) dist = set_distance(centers, found_centers) assert dist <= noise_level, dist for i in range(num_centers): for j in range(num_centers): if i != j: assert found_assignments[i] != found_assignments[j] for i in range(num_data): assert found_assignments[i] == found_assignments[i % num_centers]
def build_vocabulary(image_paths, vocab_size, step): ################################################################################## # TODO: # # Load images from the training set. To save computation time, you don't # # necessarily need to sample from all images, although it would be better # # to do so. You can randomly sample the descriptors from each image to save # # memory and speed up the clustering. Or you can simply call vl_dsift with # # a large step size here. # # # # For each loaded image, get some SIFT features. You don't have to get as # # many SIFT features as you will in get_bags_of_sift.py, because you're only # # trying to get a representative sample here. # # # # Once you have tens of thousands of SIFT features from many training # # images, cluster them with kmeans. The resulting centroids are now your # # visual word vocabulary. # ################################################################################## ################################################################################## # NOTE: Some useful functions # # This function will sample SIFT descriptors from the training images, # # cluster them with kmeans, and then return the cluster centers. # # # # Function : dsift() # # SIFT_features is a N x 128 matrix of SIFT features # # There are step, bin size, and smoothing parameters you can # # manipulate for dsift(). We recommend debugging with the 'fast' # # parameter. This approximate version of SIFT is about 20 times faster to # # compute. Also, be sure not to use the default value of step size. It will # # be very slow and you'll see relatively little performance gain from # # extremely dense sampling. You are welcome to use your own SIFT feature. # # # # Function : kmeans(X, K) # # X is a M x d matrix of sampled SIFT features, where M is the number of # # features sampled. M should be pretty large! # # K is the number of clusters desired (vocab_size) # # centers is a d x K matrix of cluster centroids. # # # # NOTE: # # e.g. 1. dsift(img, step=[?,?], fast=True) # # 2. kmeans( ? , vocab_size) # # # # ################################################################################ bag_of_features = [] print("Extract SIFT features") for path in image_paths: img = np.asarray(Image.open(path), dtype='float32') frames, descriptors = dsift(img, step, fast=True) bag_of_features.append(descriptors) bag_of_features = np.concatenate(bag_of_features, axis=0).astype('float32') print("Compute vocab") start_t = time() vocab = kmeans(bag_of_features, vocab_size, initialization="PLUSPLUS") end_t = time() print("It takes ", (start_t - end_t), " to compute vocab.") ################################################################################## # END OF YOUR CODE # ################################################################################## return vocab
def test_kmeans_ANN(): num_data = 5000 num_centers = 4 dimension = 8 noise_level = 0.1 centers = np.random.random_integers(-40, 40, (num_centers, dimension)).astype(np.float32) data = np.empty((num_data, dimension), dtype=np.float32) for i in range(num_data): data[i] = centers[i % num_centers] + np.random.random_sample(dimension)*noise_level found_centers = kmeans(data, num_centers, initialization="PLUSPLUS", algorithm="ANN") found_assignments = kmeans_quantize(data, found_centers, algorithm="ANN") assert found_centers.dtype == np.float32 assert found_centers.shape == (num_centers, dimension) dist = set_distance(centers, found_centers) assert dist <= noise_level, dist for i in range(num_centers): for j in range(num_centers): if i != j: assert found_assignments[i] != found_assignments[j] for i in range(num_data): assert found_assignments[i] == found_assignments[i % num_centers]
def trainVLADCodeBook(images, dmd_options, kmeans_options): max_descriptor = kmeans_options['num_descriptor'] num_cluster = kmeans_options['num_cluster'] num_images = len(images) num_descriptor_per_image = int(np.ceil(max_descriptor / num_images)) descrs = [] pool = Pool(processes=8) # features = computeDMD(img, dmd_options) mul_features = [ pool.apply_async(computeSDMD, args=(img, dmd_options, 0)) for img in images ] pool.close() #先获取测试图像的SDMD特征,然后进行聚类 pool.join() print('进入HVLAD') for features in mul_features: # print("[trainVLADCodeBook]reading:path='{}' features_shape:{}".format(img, features.shape)) features = features.get() sel = list(np.random.permutation( features.shape[1]))[:num_descriptor_per_image] descrs = features[:, sel] if len(descrs) == 0 else np.concatenate( (descrs, features[:, sel]), axis=1) new_descrs = np.zeros((max_descriptor, descrs.shape[0]), dtype=np.float32) new_descrs[:, :] = descrs.T[:max_descriptor, :] centers = kmeans(new_descrs, num_centers=num_cluster, verbose=True, initialization='PLUSPLUS', min_energy_variation=0.000001).astype(np.float32) return {'centers': centers}
def matlab_train_one_vs_rest_SVM(path_boxes_np, CAE_model_path, K, args): data = extract_features(path_boxes_np, CAE_model_path, args) centers = kmeans(data, num_centers=K, initialization='PLUSPLUS', num_repetitions=10, max_num_comparisons=100, max_num_iterations=100, algorithm='LLOYD', num_trees=3) labels = kmeans_quantize(data, centers) labels = np.array(labels, dtype=np.int) #data=data.astype(np.float64) #data_flatten=data.flatten() data = data.tolist() labels = labels.tolist() _labels = [] _w = [] _b = [] for i in range(K): _temp = labels for j in range(len(labels)): if _temp[j] == i: _temp[j] = 1. else: _temp[j] = -1. _labels.append(_temp) import matlab import matlab.engine import scipy.io as io # to save data into data.mat io.savemat('../matlab_files/data.mat', {'data': data}) # to save _labels into labels.mat, _labels = np.array(_labels, dtype=int) io.savemat('../matlab_files/labels.mat', {'labels': _labels}) eng = matlab.engine.start_matlab() print('use matlab backend to train!') eng.SVM_train(nargout=0) eng.quit() #eng.SVM_train() # rename os.rename('../matlab_files/data.mat', '../matlab_files/{}_data.mat'.format(args.dataset)) os.rename('../matlab_files/labels.mat', '../matlab_files/{}_labels.mat'.format(args.dataset)) os.rename('../matlab_files/weights.mat', '../matlab_files/{}_weights.mat'.format(args.dataset)) os.rename('../matlab_files/biases.mat', '../matlab_files/{}_biases.mat'.format(args.dataset))
def cal_cluster(cluster_num=500): """ calculate the bovw clustering centres """ pandarallel.initialize(nb_workers=50, use_memory_fs=False) train_list = pd.read_csv('train_list.csv') bag_of_features = [] features = train_list.parallel_apply(cal_descriptors, axis=1) for f in features: bag_of_features += f clusters = kmeans(np.array(bag_of_features).astype('float32'), 500, initialization="PLUSPLUS") # kmean cluster return clusters
def build_vocabulary(image_paths, vocab_size): feature_bag = [] print("SIFT features extracting") for image_path in image_paths: image = np.asarray(Image.open(image_path),dtype='float32') frames, descriptors = dsift(image, step=[5,5], fast=True) feature_bag.append(descriptors) feature_bag = np.concatenate(feature_bag, axis=0).astype('float32') print("Computing vocabulary") vocabulary = kmeans(feature_bag, vocab_size, initialization="PLUSPLUS") return vocabulary
def build_vocabulary(image_paths, vocab_size): bag_of_features = [] print("Extract SIFT features") for path in image_paths: print(path) img = np.asarray(Image.open(path),dtype='float32') frames, descriptors = dsift(img, step=[5,5], fast=True) bag_of_features.append(descriptors) bag_of_features = np.concatenate(bag_of_features, axis=0).astype('float32') print("Compute vocab") start_time = time() vocab = kmeans(bag_of_features, vocab_size, initialization="PLUSPLUS") end_time = time() print("It takes ", (start_time - end_time), " to compute vocab.") return vocab
def fit(self, X, y=None): """Fit clusters centers using KMeans. Args: X - 3D array with shape (number of samples, image width * height, number of channels) y - unused """ X = X.reshape(-1, X.shape[2]) if len(X) < self.samples_number: raise AttributeError( 'Number of samples must be greater than declared in initialization' ) indices = np.random.choice(X.shape[0], self.samples_number, replace=False) X = X[indices, :] self.centers = kmeans(X, self.clusters_number, algorithm=self.algorithm) return self
def sift_features(images, size): print("feature number", size) bag_of_features = [] print("Extract SIFT features...") for key, value in tqdm(images.items()): for img in value: # orb = cv2.xfeatures2d.SIFT_create(500) # orb = cv2.ORB_create() # keypoints, descriptors = orb.detectAndCompute(img, None) _, descriptors = dsift(img, step=[5, 5], fast=True) if descriptors is not None: for des in descriptors: bag_of_features.append(des) print("Compute kmeans in dimensions:", size) km = kmeans(np.array(bag_of_features).astype('float32'), size, initialization="PLUSPLUS") return km
def train_one_vs_rest_SVM(path_boxes_np, CAE_model_path, K, args): data = extract_features(path_boxes_np, CAE_model_path, args) print('feature extraction finish!') # clusters, the data to be clustered by Kmeans # clusters=KMeans(n_clusters=K,init='k-means++',n_init=10,algorithm='full',max_iter=300).fit(data) centers = kmeans(data, num_centers=K, initialization='PLUSPLUS', num_repetitions=10, max_num_comparisons=300, max_num_iterations=300) labels = kmeans_quantize(data, centers) # nums=np.zeros(10,dtype=int) # for item in clusters.labels_: # nums[item]+=1 # print(nums) print('clustering finished!') # One-Verse-Rest SVM: to train OVC-SVM for clf = svm.LinearSVC(C=1.0, multi_class='ovr', max_iter=len(labels) * 5) clf.fit(data, labels) joblib.dump(clf, svm_save_path_pre + args.dataset + '.m') print('train finished!')
def train_one_vs_rest_SVM(path_boxes_np, CAE_model_path, K, args): data = extract_features(path_boxes_np, CAE_model_path, args) print('feature extraction finish!') # clusters, the data to be clustered by Kmeans # clusters=KMeans(n_clusters=K,init='k-means++',n_init=10,algorithm='full',max_iter=300).fit(data) centers = kmeans(data, num_centers=K, initialization='PLUSPLUS', num_repetitions=10, max_num_comparisons=100, max_num_iterations=100, algorithm='LLOYD', num_trees=3) labels = kmeans_quantize(data, centers) # to get the sparse matrix of labels sparse_labels = np.eye(K)[labels] sparse_labels = (sparse_labels - 0.5) * 2 # nums=np.zeros(10,dtype=int) # for item in clusters.labels_: # nums[item]+=1 # print(nums) print('clustering finished!') # SGDC classifier with onevsrest classifier to replace the ovc-svm with hinge loss and SDCA optimizer in the paper base_estimizer = SGDClassifier(max_iter=10000, warm_start=True, loss='hinge', early_stopping=True, n_iter_no_change=50, l1_ratio=0) ovr_classifer = OneVsRestClassifier(base_estimizer) #clf=svm.LinearSVC(C=1.0,multi_class='ovr',max_iter=len(labels)*5,loss='hinge',) ovr_classifer.fit(data, sparse_labels) svm_model_path = f'{svm_save_dir}/{args.dataset}.m' joblib.dump(ovr_classifer, svm_model_path) print('train finished!')
testlabels = numpy.array(testlabels) trainvectors = [] for i in trainfeatures: a=numpy.array(Image.fromarray(i.reshape((32,32,3),order='F'))\ .transpose(Image.TRANSPOSE).convert('L')) trainvectors.append(sift.sift(a, compute_descriptor='True')[1]) bag = [] for i in trainvectors: for j in i: bag.append(j) bag = numpy.array(bag) bag = bag.astype(numpy.float32) num_of_words = 8 words = numpy.array(kmeans.kmeans(bag, num_centers=num_of_words)) trainwords = [] testvectors = [] for i in testfeatures: a=numpy.array(Image.fromarray(i.reshape((32,32,3),order='F'))\ .transpose(Image.TRANSPOSE).convert('L')) testvectors.append(sift.sift(a, compute_descriptor='True')[1]) testwords = [] for i in trainvectors: result = [0] * num_of_words for k1 in range(0, i.shape[0]): target = 0 distance = numpy.sum(numpy.square(i[k1] - words[0])) for k2 in range(1, num_of_words):
def dictionary_comp(descriptors, k, distance_measure='l2'): return kmeans.kmeans(descriptors.astype(float), k, distance=distance_measure, initialization='PLUSPLUS')
# Get some PHOW descriptors to train the dictionary selTrainFeats = random_subset(selTrain, 30) descrs = [] for ii in range(len(selTrainFeats)): im = imageio.imread(images[selTrainFeats[ii]]) im = standarizeImage(im) descrs.append(vl_phow(im, **model.phowOpts)[1]) descrs = np.concatenate(descrs, axis=1).T descrs = random_subset(descrs.tolist(), int(40e4)) descrs = np.array(descrs).T.astype(np.float32) # Quantize the descriptors to get the visual words vocab = kmeans(descrs.T.copy(order='C'), conf.numWords, verbose=True, algorithm='ELKAN', max_num_iterations=100) # Required .copy(order='C') because of cython vocab = vocab.T save(conf.vocabPath, vocab) else: vocab = load(conf.vocabPath) model.vocab = vocab if model.quantizer == 'kdtree': from scipy.spatial import KDTree model.kdtree = KDTree(vocab.T) # --------------------------------------------------------------------
def generate_kmeans(X, _clusters=32): centers = kmeans(X, num_centers=_clusters, max_num_iterations=10000, verbose=True) return centers
def build_vocabulary(image_paths, vocab_size): """ This function will sample SIFT descriptors from the training images, cluster them with kmeans, and then return the cluster centers. Useful functions: - Use load_image(path) to load RGB images and load_image_gray(path) to load grayscale images - frames, descriptors = vlfeat.sift.dsift(img) http://www.vlfeat.org/matlab/vl_dsift.html - frames is a N x 2 matrix of locations, which can be thrown away here (but possibly used for extra credit in get_bags_of_sifts if you're making a "spatial pyramid"). - descriptors is a N x 128 matrix of SIFT features Note: there are step, bin size, and smoothing parameters you can manipulate for dsift(). We recommend debugging with the 'fast' parameter. This approximate version of SIFT is about 20 times faster to compute. Also, be sure not to use the default value of step size. It will be very slow and you'll see relatively little performance gain from extremely dense sampling. You are welcome to use your own SIFT feature code! It will probably be slower, though. - cluster_centers = vlfeat.kmeans.kmeans(X, K) test_image_feats - X is a N x d numpy array of sampled SIFT features, where N is the number of features sampled. N should be pretty large! - K is the number of clusters desired (vocab_size) cluster_centers is a K x d matrix of cluster centers. This is your vocabulary. Args: - image_paths: list of image paths. - vocab_size: size of vocabulary Returns: - vocab: This is a vocab_size x d numpy array (vocabulary). Each row is a cluster center / visual word """ # Load images from the training set. To save computation time, you don't # necessarily need to sample from all images, although it would be better # to do so. You can randomly sample the descriptors from each image to save # memory and speed up the clustering. Or you can simply call vl_dsift with # a large step size here, but a smaller step size in get_bags_of_sifts. # # For each loaded image, get some SIFT features. You don't have to get as # many SIFT features as you will in get_bags_of_sift, because you're only # trying to get a representative sample here. # # Once you have tens of thousands of SIFT features from many training # images, cluster them with kmeans. The resulting centroids are now your # visual word vocabulary. ############################################################################# # TODO: YOUR CODE HERE # ############################################################################# dim = 128 # length of the SIFT descriptors that you are going to compute. vocab = np.zeros((vocab_size,dim)) # intialization of vocab bag_of_features = [] for path in image_paths: img = np.asarray(load_image_gray(path),dtype='float32') # loading grayscale image and converting it to numpy array frames, descriptors = dsift(img, step=[10,10], fast=True) #SIFT descriptor using step size of 10 and fast true bag_of_features.append(descriptors) bag_of_features = np.concatenate(bag_of_features, axis=0).astype('float32') #list into an array print("Compute vocab") start_time = time.time() vocab = kmeans(bag_of_features, vocab_size, initialization="PLUSPLUS") # using kmeans for clusters center end_time = time.time() print("It takes ", (end_time - start_time), " to compute vocab.") ############################################################################# # END OF YOUR CODE # ############################################################################# return vocab
#format:{'catagory': ['img_name', img]} train_data = load_img_from_folder(train_data_path) test_data = load_img_from_folder(test_data_path) print("finish load image...") #sift description description_bag, sift_data_dic = create_sift_discription(train_data) _, test_sift_data_dic = create_sift_discription(test_data) print("finish sift description...") cluster_num = 200 bag_of_features = np.array(description_bag).astype(np.float32) end = time.time() if not os.path.exists("vocab_" + str(cluster_num) + ".npy"): vocab = kmeans(bag_of_features, cluster_num, initialization='PLUSPLUS') print("finish kmeans " + str(cluster_num) + ":", time.time()-end, "s") np.save("vocab_" + str(cluster_num) + ".npy", vocab) else: vocab = np.load("vocab_" + str(cluster_num) + ".npy") if not os.path.exists("train_hist_" + str(cluster_num) + ".npy"): end = time.time() train_feats, train_labels = get_bags_of_sifts(sift_data_dic, vocab) np.save("train_hist_" + str(cluster_num) + ".npy", np.array(train_feats)) np.save("train_label_" + str(cluster_num) + ".npy", np.array(train_labels)) print("finish training hist...", time.time() - end, "s") else: train_feats = np.load("train_hist_" + str(cluster_num) + ".npy") train_labels = np.load("train_label_" + str(cluster_num) + ".npy")
def build_vocabulary(image_paths, vocab_size): ''' This function should sample HOG descriptors from the training images, cluster them with kmeans, and then return the cluster centers. Inputs: image_paths: a Python list of image path strings vocab_size: an integer indicating the number of words desired for the bag of words vocab set Outputs: a vocab_size x (z*z*9) (see below) array which contains the cluster centers that result from the K Means clustering. You'll need to generate HOG features using the skimage.feature.hog() function. The documentation is available here: http://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.hog However, the documentation is a bit confusing, so we will highlight some important arguments to consider: cells_per_block: The hog function breaks the image into evenly-sized blocks, which are further broken down into cells, each made of pixels_per_cell pixels (see below). Setting this parameter tells the function how many cells to include in each block. This is a tuple of width and height. Your SIFT implementation, which had a total of 16 cells, was equivalent to setting this argument to (4,4). pixels_per_cell: This controls the width and height of each cell (in pixels). Like cells_per_block, it is a tuple. In your SIFT implementation, each cell was 4 pixels by 4 pixels, so (4,4). feature_vector: This argument is a boolean which tells the function what shape it should use for the return array. When set to True, it returns one long array. We recommend setting it to True and reshaping the result rather than working with the default value, as it is very confusing. It is up to you to choose your cells per block and pixels per cell. Choose values that generate reasonably-sized feature vectors and produce good classification results. For each cell, HOG produces a histogram (feature vector) of length 9. We want one feature vector per block. To do this we can append the histograms for each cell together. Let's say you set cells_per_block = (z,z). This means that the length of your feature vector for the block will be z*z*9. With feature_vector=True, hog() will return one long np array containing every cell histogram concatenated end to end. We want to break this up into a list of (z*z*9) block feature vectors. We can do this using a really nifty numpy function. When using np.reshape, you can set the length of one dimension to -1, which tells numpy to make this dimension as big as it needs to be to accomodate to reshape all of the data based on the other dimensions. So if we want to break our long np array (long_boi) into rows of z*z*9 feature vectors we can use small_bois = long_boi.reshape(-1, z*z*9). The number of feature vectors that come from this reshape is dependent on the size of the image you give to hog(). It will fit as many blocks as it can on the image. You can choose to resize (or crop) each image to a consistent size (therefore creating the same number of feature vectors per image), or you can find feature vectors in the original sized image. ONE MORE THING If we returned all the features we found as our vocabulary, we would have an absolutely massive vocabulary. That would make matching inefficient AND inaccurate! So we use K Means clustering to find a much smaller (vocab_size) number of representative points. We recommend using sklearn.cluster.KMeans to do this. Note that this can take a VERY LONG TIME to complete (upwards of ten minutes for large numbers of features and large max_iter), so set the max_iter argument to something low (we used 100) and be patient. You may also find success setting the "tol" argument (see documentation for details) ''' #TODO: Implement this function! # cluster_SIFT_features = [] # sift = cv2.xfeatures2d.SIFT_create() # for image_path in tqdm(image_paths, desc="Imaging-SIFT"): # image = cv2.imread(image_path) # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # locations, SIFT_features = sift.detectAndCompute(gray, None) # temp = SIFT_features.tolist() # cluster_SIFT_features += temp # cluster_SIFT_features = random.sample(cluster_SIFT_features, 400 * 3) # kmeans = KMeans(n_clusters=vocab_size, max_iter=100).fit(cluster_SIFT_features) # cluster_centers = kmeans.cluster_centers_ # return np.array(cluster_centers) bag_of_features = [] print("Extract SIFT features") #pdb.set_trace() for path in tqdm(image_paths, desc='build_vocabulary'): img = np.asarray(Image.open(path), dtype='float32') frames, descriptors = dsift(img, step=[5, 5], fast=True) bag_of_features.append(descriptors) bag_of_features = np.concatenate(bag_of_features, axis=0).astype('float32') #pdb.set_trace() print("Compute vocab") start_time = time() vocab = kmeans(bag_of_features, vocab_size, initialization="PLUSPLUS") end_time = time() print("It takes ", (start_time - end_time), " to compute vocab.") return vocab
points_org.append(harrispoints) [(hogvalues,valid_points)] = calcHog(gray,points_org,patchsize,ncells,i-1) for m in range(hogvalues.shape[0]): hogvalues[m] /= sum(hogvalues[m]) hogmatrix.append(hogvalues) img[dmax]=[0,0,255] # cv2.imshow('image',img) # cv2.waitKey(0) # cv2.destroyAllWindows() hogmatrixcat = np.concatenate((hogmatrix[:]),axis=0) kmeancenters = kmeans.kmeans(hogmatrixcat,kvalue,initialization='PLUSPLUS') kmeanclusters = kmeans.kmeans_quantize(hogmatrixcat,kmeancenters) label_org = [] clustercount = 0 for i in range(len(hogmatrix)): count = 0 label2 = [] while count < hogmatrix[i].shape[0]: label2.append(kmeanclusters[clustercount]) clustercount += 1 count += 1 label_org.append(label2) hist_org=np.zeros((len(label_org),kvalue))