def generate_captions(filename, top_n=5): cnn_features = get_features(filename) stop_idx = word_to_idx['<START/STOP>'] beam_size = 20 pool = [[[stop_idx], 0]] first_pass = True # n.b. this is suboptimal a lot of recalculations occur while any(each[0][-1] != stop_idx for each in pool) or first_pass: new_pool = [] for entry in pool: if entry[0][-1] == stop_idx and not first_pass: new_pool.append(entry) continue probs = predict_probs(entry[0], cnn_features).flatten() for word_idx in probs.argsort()[-beam_size:]: k = len(entry[0]) - 1 caption = entry[0] + [word_idx] score = (entry[1] * k - np.log(probs[word_idx])) / (k + 1) new_pool.append([caption, score]) pool = sorted(new_pool, key=lambda e: e[1])[:beam_size] first_pass = False if beam_size != 5: beam_size -= 1 pool = sorted(pool, key=lambda e: e[1])[:top_n] for entry in pool: entry[1] *= len(entry[0]) - 1 entry[0] = u' '.join([idx_to_word[word_idx] for word_idx in entry[0][1:-1]]) pool = sorted(pool, key=lambda e: e[1]) return zip(*pool)
def generate_captions(filename, top_n=5): cnn_features = get_features(filename) stop_idx = word_to_idx['<START/STOP>'] beam_size = 20 pool = [[[stop_idx], 0]] first_pass = True # n.b. this is suboptimal a lot of recalculations occur while any(each[0][-1] != stop_idx for each in pool) or first_pass: new_pool = [] for entry in pool: if entry[0][-1] == stop_idx and not first_pass: new_pool.append(entry) continue probs = predict_probs(entry[0], cnn_features).flatten() for word_idx in probs.argsort()[-beam_size:]: k = len(entry[0]) - 1 caption = entry[0] + [word_idx] score = (entry[1] * k - np.log(probs[word_idx])) / (k + 1) new_pool.append([caption, score]) pool = sorted(new_pool, key=lambda e: e[1])[:beam_size] first_pass = False if beam_size != 5: beam_size -= 1 pool = sorted(pool, key=lambda e: e[1])[:top_n] for entry in pool: entry[1] *= len(entry[0]) - 1 entry[0] = u' '.join( [idx_to_word[word_idx] for word_idx in entry[0][1:-1]]) pool = sorted(pool, key=lambda e: e[1]) return zip(*pool)
def extract_features(image_dict, seg_dict): features = dict() for image_name in image_dict.keys(): print image_name features[image_name] = feature_extractor.get_features(image_dict[image_name], image_name, seg_dict[image_name]) return features
def predictNeural(self, image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) inputs_f = [] inputs_f.append(fe.get_features(gray)) #print "raw data about image", inputs_f # Create a matrix of predictions inputs_f.append(np.array([0., 10., 22., 26., 43., 37., 30., 27., 36., 48., 59., 70., 80., 82., 80., 75., 66., 57., 49., 123., 213., 123., 29., 25., 25., 24., 25., 30., 38., 57., 70., 92., 115., 122., 174., 199., 224., 255., 235., 116., 43., 26., 20., 18., 15., 1., 14., 12., 12., 10., 11., 13., 17., 18., 11., 10., 124., 18., 13., 13., 37., 17., 5., 1., 8.80175781, 2.59635413, 1.29720053])) inputs = np.array(inputs_f) predictions = np.empty( (len(inputs), 1), 'float' ) # See how the network did. #print "inputs", inputs #print str(self.nnet) self.nnet.predict(inputs, predictions) #print "predictions", predictions # Compute # correct pred_labels = predictions #print pred_labels return pred_labels[0]
def test(self, sample_img, numNeigh=11): k = numNeigh gray = cv2.cvtColor(sample_img,cv2.COLOR_BGR2GRAY) sample = fe.get_features(gray) sample = np.array(sample,np.float32).reshape((1,len(sample))) nearest = self.knn.find_nearest(sample, k) return nearest[0]
def predictSVM(self, im): gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) inputs_f = [] inputs_f.append(fe.get_features(gray)) inputs = np.array(inputs_f) result = self.svm.predict_all(inputs) print result return result[0]
def lineparser(line): if line[0] is not "#": line_fields = line.split("\t") if "PE" in line_fields[7]: try: if pefile.PE("extract_files/"+line_fields[22],fast_load=True).is_exe(): #just another way to validate print "EXE FILE DOWNLOADED FROM %s BY %s"%(line_fields[2],line_fields[3]) print requests.post('http://localhost:8080/ML',json=feature_extractor.get_features("extract_files/"+line_fields[22])).json() except: print "Unable to open the file"
def run(self): self.key = "mlmd" try: if os.path.exists(self.file_path): #extract PE data, send to MLMD server, set data=reply features = feature_extractor.get_features(self.file_path) res = requests.post("http://localhost:8080/ML",json=features) data = res.json() except SomethingFailed: raise CuckooProcessingError("Failed") return data
def main(argv): if len(argv) != 1: print("Usage: python3 detect_object.py input-image-path") exit() # READ IMAGE IMAGEPATH = argv[0] img = Image.open(IMAGEPATH).resize((224, 224)) # LOAD PRETRAINED VGG16 MODEL FOR FEATURE EXTRACTION vgg_model = get_model() # EXTRACT IMAGE FEATURE img_feature = get_features(vgg_model, img) # L2 NORMALIZE FEATURE img_feature = normalize(img_feature, norm='l2') # LOAD ZERO-SHOT MODEL model = load_keras_model(model_path=MODELPATH) # MAKE PREDICTION pred = model.predict(img_feature) # LOAD CLASS WORD2VECS class_vectors = sorted(np.load(WORD2VECPATH, allow_pickle=True), key=lambda x: x[0]) classnames, vectors = zip(*class_vectors) classnames = list(classnames) vectors = np.asarray(vectors, dtype=np.float) # PLACE WORD2VECS IN KDTREE tree = KDTree(vectors) # FIND CLOSEST WORD2VEC and GET PREDICTION RESULT dist, index = tree.query(pred, k=5) pred_labels = [classnames[idx] for idx in index[0]] # PRINT RESULT print() print("--- Top-5 Prediction ---") for i, classname in enumerate(pred_labels): print("%d- %s" % (i + 1, classname)) print() return
def get_similarity_list(source_id): global unique_images source_image_data = feat_ex.get_np_array_from_image("test/generated/" + str(source_id) + ".jpg") # get features for source image feat = feat_ex.get_features(intermediate_model, source_image_data) # get similarity measure with entire dataset cosine_dist_matrix = feat_ex.compute_cosine_distance_matrix( feat, feat_matrix) similarity_score_index = cosine_dist_matrix.flatten().argsort( )[::-1][:num_similar] similarity_scores = cosine_dist_matrix[similarity_score_index].flatten() similar_images = np_original[similarity_score_index] # plot_similar(source_image_data, similarity_score_index, # similar_images, similarity_scores) similarity_details = [] for i in range(len(similar_images)): # keep track of images we havent seen if (similarity_score_index[i] not in unique_images): unique_images.append(similarity_score_index[i]) similarity_dict = { "id": int(similarity_score_index[i]), "score": (round(similarity_scores[i], 4)), "layer": layer_name, "sourceid": source_id } similarity_details.append(similarity_dict) # print(similarity_details) print(source_id, " > processing similarity details using VGG layer ", layer_name) return similarity_details
def apply_pca(): dataset_size = 112 dim = 100352 i = 0 matrix = np.zeros((dataset_size, dim)) for imagePath in glob.glob(dataset + os.path.sep + "*.*"): # extract our unique image ID (i.e. the filename) features = get_features(imagePath) matrix[i] = features i += 1 print(matrix.shape) reduced_dim = 100 pca = PCA(n_components=reduced_dim) principal_comp = pca.fit_transform(matrix) print(principal_comp.shape) # print() i = 0 for imagePath in glob.glob(dataset + os.path.sep + "*.*"): with h5py.File(index_file, 'a') as h: k = imagePath[imagePath.rfind('h') + 1:] h.create_dataset(k, data=principal_comp[i]) i += 1
test_ids = shuffled_ids[int(all_img_num * test_ratio) * testset:int(all_img_num * test_ratio) * (testset + 1)] train_img_num = len(train_ids) print ">> train-test split (%d, %d)" % (len(train_ids), len(test_ids)) ################################################## # EXTRACT FEATURES ################################################## fdir = hp.cur_dir + "tmp/features_colbow.csv" if os.path.isfile(fdir): print ">> use existing features" features = np.genfromtxt(fdir, delimiter=',') else: print ">> extract features" features = fe.get_features(images, hp.FeatureType.COL_BOW) # imgNum*featureLen np.savetxt(fdir, features, delimiter=',') print ">> features extracted" ################################################## # FIT A CLASSIFIER USING RANDOM DATA ################################################## all_data = train_ids sampled_num = int(train_img_num * hp.sampling_ratio) sampled_ids = all_data[:sampled_num] lbs = [labels[x] for x in sampled_ids] fts = [features[x] for x in sampled_ids] classifier = cf.get_classifier(fts, lbs, hp.ClassifierType.RANDOM_FOREST) print ">> fit classifier with %d labeled samples" % sampled_num train_features = [features[x] for x in train_ids]
def get_data(use_precomputed=False): if use_precomputed: filename = "all_data.pkl" if not isfile(filename): print("couldn't load pickle file. recomputing features") return get_data(use_precomputed=False) else: print("loading pickled data") return load_obj(filename) else: # # subjects = [101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117] # subjects = [101,103,104,106,107,108,109,110,111,112,113,114,115,116,117] # labels = [1, 2, 3, 4, 5] # class_names = "none,eyebrows lower,eyebrows raiser,cheek raiser,nose wrinkler,upper lip raiser,mouth open".split(',') # is_moving_data = False X_all_raw = None raw_index = [ ] # list of tuples containing (subject number, label number, trial index) X_all = None y_all = [] groups = [] # accumulate the data for the all the subjects print("reading raw data into memory") for subject in subjects: # subject_data = np.zeros(shape=(0,201,10)) for label in labels: path = get_path(subject, label, is_moving_data) # [ trial * window frames * sensor channels ] subject_matrix = scipy.io.loadmat(path)['data_chunk'] groups += [subject] * subject_matrix.shape[0] y_all += [label] * subject_matrix.shape[0] for trial in range(subject_matrix.shape[0]): raw_window = subject_matrix[trial, :, :] # print(raw_window.shape) if X_all_raw is None: X_all_raw = np.empty(shape=(0, len(raw_window), 10), dtype=float) # print(X_all_raw.shape) # exit() X_all_raw = np.concatenate( (X_all_raw, raw_window[np.newaxis, :, :]), axis=0) raw_index += [(subject, label, trial)] print("normalizing data") # normalize accelerometer signals a = np.mean(np.std(X_all_raw[:, :, 0:3], axis=2)) b = np.mean(np.mean(X_all_raw[:, :, 0:3], axis=2)) X_all_raw[:, :, 0:3] = (X_all_raw[:, :, 0:3] - b) / a # normalize gyroscope signals a = np.mean(np.std(X_all_raw[:, :, 3:6], axis=2)) b = np.mean(np.mean(X_all_raw[:, :, 3:6], axis=2)) X_all_raw[:, :, 3:6] = (X_all_raw[:, :, 3:6] - b) / a # normalize eog signals # a = np.mean(np.std(X_all_raw[:,:,6:], axis=2)) # b = np.mean(np.mean(X_all_raw[:,:,6:], axis=2)) # X_all_raw[:,:,6:10] = (X_all_raw[:,:,6:10] - b) / a mean_eog_signals = np.mean(np.mean(X_all_raw[:, :, 6:10], axis=1), axis=0) X_all_raw[:, :, 6:10] = X_all_raw[:, :, 6:10] - mean_eog_signals print("saving raw data") raw_index = np.array(raw_index) save_obj((X_all_raw, raw_index), "../../res/all_data_raw.pkl", sanitized=False) # exit() print("extracting features") for trial in tqdm(range(X_all_raw.shape[0])): feature_extracted_window, feature_names = get_features( X_all_raw[trial, :, :], include_eog, include_imu) feature_extracted_window = np.array(feature_extracted_window) if X_all is None: X_all = np.empty(shape=(0, len(feature_extracted_window)), dtype=float) X_all = np.concatenate( (X_all, feature_extracted_window[np.newaxis, :]), axis=0) y_all = np.array(y_all) # np.savetxt("y_all.txt", y_all) # DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG groups = np.array(groups) data_blob = (X_all, y_all, groups, feature_names, subjects, labels, class_names, is_moving_data, include_eog, include_imu) print("pickling data") save_obj("all_data.pkl", data_blob) return data_blob
from feature_extractor import get_features from xg_model import xgb_score from xg_model import xgb_model from xg_model import save_result if __name__ == '__main__': train, test, feature_types = get_features(regenerate=False) print(train[0].head()) # get model score # the default cv is 5 xgb_score(train) # get model predict #predict = xgb_model(train[0], train[1], test[0]) # save model predict to prediction/ # return is the csv content. type is Dataframe # the default csv name is test_result result = save_result(train, test) print(result.head())
def test(self, sample_img): k = 10 gray = cv2.cvtColor(sample_img,cv2.COLOR_BGR2GRAY) sample = fe.get_features(gray) sample = np.array(sample,np.float32).reshape((1,len(sample))) return self.knn.find_nearest(sample, k)[0]
def lbp_pipeline(gray_image, **kwargs): return get_features(*image_preprocessing(gray_image))
dim = 100352 i = 0 matrix = np.zeros((dataset_size, dim)) for imagePath in glob.glob(dataset + os.path.sep + "*.*"): # extract our unique image ID (i.e. the filename) features = get_features(imagePath) matrix[i] = features i += 1 print(matrix.shape) reduced_dim = 100 pca = PCA(n_components=reduced_dim) principal_comp = pca.fit_transform(matrix) print(principal_comp.shape) # print() i = 0 for imagePath in glob.glob(dataset + os.path.sep + "*.*"): with h5py.File(index_file, 'a') as h: k = imagePath[imagePath.rfind('h') + 1:] h.create_dataset(k, data=principal_comp[i]) i += 1 # apply_pca() for imagePath in glob.glob(dataset + os.path.sep + "*.*"): # extract our unique image ID (i.e. the filename) k = imagePath[imagePath.rfind('h') + 1:] features = get_features(imagePath) with h5py.File(index_file, 'a') as h: h.create_dataset(k, data=features)