def bag_of_words(self, words, vocab): words = b.rem_special_chars(words) words = b.convert_to_lowcase(words) words = b.rem_stopwords(words) words = b.lemma(words) words = self.text_to_word(words) bag = np.zeros(len(vocab)) for w in words: for i, x in enumerate(vocab): if x == w: bag[i] += 1 return np.array(bag)
def read_part_sift_descriptors(rate=0.05): """ read_part_sift_descriptors select descriptors from a certain percent, indicated by rate, of sift features of images from each class. Actually, the descriptors are corresponding to that of read_part_sift_features_images(rate) For example, Codes: part_sift_descriptors = read_part_sift_descriptors() print(part_sift_descriptors.shape) Outputs: (3236336, 128) """ import BOW return BOW.read_list_sift_features(rate)
def process_df_frames_train_set(item, method='surf', histogram_norm='l1'): import FeatureExtraction import random filename, df_frames = item if df_frames['frame_index'].iloc[0] % 100 != 24: return [] boxes_dict = Preprocessing.extract_box_img(df_frames) train_set = [] kmeans = BOW.load_kmeans(f'kmeans_{method}_128.pkl') for category, boxes in boxes_dict.items(): for box in boxes: _, des = FeatureExtraction.feature_extraction(box, method) if des is not None: histogram = BOW.compute_histogram(des, kmeans, method=method, norm=histogram_norm) train_set.append((histogram, category)) return train_set
def __init__(self): self.bowHOG = BOW() self.bowHOF = BOW() self.bowMBFx = BOW() self.bowMBFy = BOW() self.dimHOG = 96 self.dimHOF = 108 self.dimMBFx = 96 self.dimMBFy = 96 self.vocszHOG = 128 self.vocszHOF = 128 self.vocszMBFx = 128 self.vocszMBFy = 128
class DenseTrajBOW: def __init__(self): self.bowHOG = BOW() self.bowHOF = BOW() self.bowMBFx = BOW() self.bowMBFy = BOW() self.dimHOG = 96 self.dimHOF = 108 self.dimMBFx = 96 self.dimMBFy = 96 self.vocszHOG = 128 self.vocszHOF = 128 self.vocszMBFx = 128 self.vocszMBFy = 128 def build(self,dataHOG,dataHOF,dataMBFx,dataMBFy): self.bowHOG.vq(data=dataHOG,voc_size=self.vocszHOG,gt_labels=None) def calcFeatures(self,dataHOG,dataHOF,dataMBFx,dataMBFy): self.bowHOG.calc_bow_representation(fv=dataHOG) return self.bowHOG.bow
def read_bowed_labeled_features(use_dl=False, use_cv=False): """ use_dl: use deep learning features or BOW features :return a list [features, labels] """ if use_dl: features = read_features(use_dl=True) labels = [] for line in open(global_defs.PATH_LABELS, 'r'): labels.append(int(line)) labels = np.array(labels) else: if not use_cv: import BOW return BOW.read_BOWed_labeled_features() else: import sift_bow_cv return sift_bow_cv.read_labeled_BOWed_features_cv() return [features, labels]
words = b.convert_to_lowcase(words) words = b.rem_stopwords(words) words = b.lemma(words) words = self.text_to_word(words) bag = np.zeros(len(vocab)) for w in words: for i, x in enumerate(vocab): if x == w: bag[i] += 1 return np.array(bag) # Read data from a file f = open('text2.txt', 'r') corpus = f.read() corpus = b.rem_special_chars(corpus) corpus = b.convert_to_lowcase(corpus) corpus = b.rem_stopwords(corpus) corpus = b.lemma(corpus) t = Tokenizer() s = t.text_to_word(corpus) print("word count=", t.word_count(s)) print("number of unique words", t.number_of_unique_words(s)) inp = "autonomous individuals mutual aid self governance" print("input = ", inp) print("bag of words = ", t.bag_of_words(inp, s))
def clfs_rolling_windows(img, clf, stride=8, padding=0, ratios=((1, 1), (3, 1), (1, 3)), scale=1.5, prob_threshold=0.75, min_side_length=16, method='sift', kmeans_path=None): if method not in ('sift', 'surf'): raise ValueError("method param should be sift or surf") if kmeans_path is None: kmeans_path = f'kmeans_{method}_128.pkl' h, w = img.shape[:2] padded_image = np.zeros((h + padding * 2, w + padding * 2, img.shape[2]), dtype='uint8') padded_image[padding:h + padding, padding:w + padding, :] = img h, w = padded_image.shape[:2] box_hist_dict = { 'box': [], 'histogram': [], 'high_prob': [], 'high_category': [] } kmeans = BOW.load_kmeans(kmeans_path) kp, des = FeatureExtraction.feature_extraction(img, method) df_kp = pd.DataFrame({ 'x': [item.pt[0] for item in kp], 'y': [item.pt[1] for item in kp] }) for ratio in ratios: print(f'ratio {ratio}') ratio_h, ratio_w = ratio if ratio_h < ratio_w: # (h, w) start_window = (min_side_length, int(min_side_length * ratio_w / ratio_h)) else: start_window = (int(min_side_length * ratio_h / ratio_w), min_side_length) window = start_window this_stride = stride tmp = window count = 0 while tmp[0] <= h and tmp[1] <= w: count += 1 tmp = (int(tmp[0] * scale), int(tmp[1] * scale)) pbar = tqdm(total=count) while window[0] <= h and window[1] <= w: for i in range(0, h - window[0], this_stride): for j in range(0, w - window[1], this_stride): #print(i, j) box = (i, j, i + window[0], j + window[1]) #img_this = padded_image[box[0]:box[2], box[1]:box[3]] #_, des_this = FeatureExtraction.feature_extraction(img_this, method) des_this = des[np.logical_and.reduce( (box[0] < df_kp['y'], df_kp['y'] < box[2], box[1] < df_kp['x'], df_kp['x'] < box[3]))] if des_this is None or len(des_this) == 0: continue histogram = BOW.compute_histogram(des_this, kmeans, method=method, norm='l0') if histogram is None: continue #print(clf.predict(histogram.reshape(1, -1))) """cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE) cv2.imshow('image', img_this) cv2.waitKey() cv2.destroyAllWindows()""" box_hist_dict['box'].append(box) box_hist_dict['histogram'].append(histogram) window = (int(window[0] * scale), int(window[1] * scale)) this_stride = int(this_stride * np.sqrt(scale)) pbar.update(1) pbar.close() X = np.array(box_hist_dict['histogram']) y_prob = clf.predict_proba(X) high_prob = np.max(y_prob, axis=1).ravel().tolist() high_category = np.argmax(y_prob, axis=1).ravel().tolist() box_hist_dict['high_prob'] = high_prob box_hist_dict['high_category'] = high_category df_box_hist = pd.DataFrame(box_hist_dict) df_box_hist = df_box_hist[df_box_hist['high_category'] != 0] df_box_hist = df_box_hist[df_box_hist['high_prob'] > prob_threshold] return df_box_hist
#B_gt_labels = np.ones(N_B,dtype='int') #feat = np.concatenate((A,B), axis=0) #gt_labels = np.concatenate((A_gt_labels,B_gt_labels), axis=0) # Test Case 2: Tiny dataset # desc = []; desc.append([1,1]) desc.append([1.5,1]) desc.append([1,15]) desc.append([1.5,1]) desc.append([1,1.5]) desc.append([1,1.6]) desc.append([10,10]) desc.append([12,10]) desc.append([10,13]) desc.append([14,10]) desc.append([10,15]) feat = sp.vstack(tuple(desc)) gt_labels = np.array([0,0,1,0,0,0,1,1,1,1,1]) V = BOW(data=feat,voc_size=2,gt_labels=gt_labels) #print V.vq_data feat2 = np.concatenate((feat,feat),axis=0) V.calc_bow_representation(fv=feat2) print V.bow