def train(self, featurefiles, k=100, subsampling=10): """ Train a vocabulary from features in files listed in featurefiles using k-means with k number of words. Subsampling of training data can be used for speedup. """ nbr_images = len(featurefiles) # read the features from file descr = [] descr.append(sift.read_features_from_file(featurefiles[0])[1]) descriptors = descr[0] #stack all features for k-means print("start vstack descriptors") for i in arange(1, nbr_images): descr.append(sift.read_features_from_file(featurefiles[i])[1]) descriptors = vstack((descriptors, descr[i])) # k-means: last number determines number of runs print("start kmeans") self.voc, distortion = kmeans(descriptors[::subsampling, :], k, 1) self.nbr_words = self.voc.shape[0] # go through all training images and project on vocabulary imwords = zeros((nbr_images, self.nbr_words)) for i in range(nbr_images): imwords[i] = self.project(array(descr[i])) nbr_occurences = sum((imwords > 0) * 1, axis=0) #http://zhangjunhd.github.io/2014/09/30/text-clustering.html self.idf = log((1.0 * nbr_images) / (1.0 * nbr_occurences + 1)) # 这里仅了idf,另外这里写得应该有些问题 self.trainingdata = featurefiles
def get_database(self): # load vocabulary with open('vocabulary_bof.pkl', 'rb') as f: voc = pickle.load(f) # set index index = imagesearch.Indexer('index_bof.db', voc) index.create_tables() # project features on vocabulary for i in range(images_num)[:1000]: locs, descr = sift.read_features_from_file(feature_list[i]) index.add_to_index(image_list[i], descr) # commit to database index.db_commit() con = sqlite.connect('index_bof.db') print(con.execute('select count (filename) from imlist').fetchone()) print(con.execute('select * from imlist').fetchone())
def bof_image_retrieval(self): # load vocabulary and query feature src = self.image_searcher() q_descr, fp = self.load_query_feature() # RANSAC model for homography fitting model = homography.RansacModel() rank = {} # query match_scores = [ w[0] for w in src.query(self.imlist[q_ind])[:nbr_results] ] res_reg = [w[1] for w in src.query(self.imlist[q_ind])[:nbr_results]] print('top matches:', res_reg) self.plot_results(res_reg[:6], match_scores[:6]) if self.bof_rearrange: # load image features for result for ndx in res_reg[1:]: locs, descr = sift.read_features_from_file(self.featlist[ndx]) # get matches matches = sift.match(q_descr, descr) ind = matches.nonzero()[0] ind2 = matches[ind] locs = np.array(locs) tp = homography.make_homog(locs[:, :2].T) # compute homography, count inliers. try: H, inliers = homography.H_from_ransac(fp[:, ind], tp[:, ind2], model, match_theshold=4) except: inliers = [] # store inlier count rank[ndx] = len(inliers) # sort dictionary to get the most inliers first sorted_rank = sorted(rank.items(), key=lambda t: t[1], reverse=True) res_geom = [res_reg[0]] + [s[0] for s in sorted_rank] # print('top matches (homography):', res_geom) # show results self.plot_results(res_geom[:6], match_scores[:6])
import pickle from numpy import * from imagesearch import imagesearch from localdescriptors import sift from sqlite3 import dbapi2 as sqlite from tools.imtools import get_imlist imlist = get_imlist('./first500/') nbr_images = len(imlist) featlist = [imlist[i][:-3]+'sift' for i in range(nbr_images)] f = open('./vocabulary.pkl', 'rb') voc = pickle.load(f) f.close() src = imagesearch.Searcher('web.db',voc) locs,descr = sift.read_features_from_file(featlist[0]) iw = voc.project(array(descr)) print 'ask using a histogram...' print src.candidates_from_histogram(iw)[:10] src = imagesearch.Searcher('web.db',voc) print 'try a query...' nbr_results = 12 res = [w[1] for w in src.query(imlist[12])[:nbr_results]] imagesearch.plot_results(src,res)
def load_query_feature(self): # load image features for query image q_locs, q_descr = sift.read_features_from_file(self.featlist[q_ind]) q_locs = np.array(q_locs) fp = homography.make_homog(q_locs[:, :2].T) return q_descr, fp
import pickle from numpy import * from imagesearch import imagesearch from localdescriptors import sift from sqlite3 import dbapi2 as sqlite from tools.imtools import get_imlist imlist = get_imlist('./first500/') nbr_images = len(imlist) featlist = [imlist[i][:-3] + 'sift' for i in range(nbr_images)] f = open('./vocabulary.pkl', 'rb') voc = pickle.load(f) f.close() src = imagesearch.Searcher('web.db', voc) locs, descr = sift.read_features_from_file(featlist[0]) iw = voc.project(array(descr)) print 'ask using a histogram...' print src.candidates_from_histogram(iw)[:10] src = imagesearch.Searcher('web.db', voc) print 'try a query...' nbr_results = 12 res = [w[1] for w in src.query(imlist[12])[:nbr_results]] imagesearch.plot_results(src, res)