def get_prediction(image, trained_model, probability_limit): """ Get the predicted probabilities for each class :param image: image to evaluate :param model: trained model :return: image name, probability for each class and overall classification """ test_feature_vector = features.get_feature_vector(image) predicted_probabilities = trained_model.predict_proba(test_feature_vector) classification = get_classification(predicted_probabilities, probability_limit) return image, predicted_probabilities[0][0], predicted_probabilities[0][1], predicted_probabilities[0][2], classification
def get_prediction(image, trained_model, probability_limit): """ Get the predicted probabilities for each class :param image: image to evaluate :param model: trained model :return: image name, probability for each class and overall classification """ test_feature_vector = features.get_feature_vector(image) predicted_probabilities = trained_model.predict_proba(test_feature_vector) classification = get_classification(predicted_probabilities, probability_limit) return image, predicted_probabilities[0][0], predicted_probabilities[0][ 1], predicted_probabilities[0][2], classification
def get_training_data(training_data_directory): """ Takes labelled folders of images, featurizes them and returns training data that can be used to train models :param training_data_directory: directory where the training data is stored :return: X and y """ X = [] y = [] for directory in os.listdir(training_data_directory): if os.path.isdir(training_data_directory + directory + '/'): for file in os.listdir(training_data_directory + directory + '/'): if not file.startswith('.'): featurevector = features.get_feature_vector(training_data_directory + directory + '/' + file) X.append(featurevector) y.append(directory) return X, y
def get_training_data(training_data_directory): """ Takes labelled folders of images, featurizes them and returns training data that can be used to train models :param training_data_directory: directory where the training data is stored :return: X and y """ X = [] y = [] for directory in os.listdir(training_data_directory): if os.path.isdir(training_data_directory + directory + '/'): for file in os.listdir(training_data_directory + directory + '/'): if not file.startswith('.'): featurevector = features.get_feature_vector( training_data_directory + directory + '/' + file) X.append(featurevector) y.append(directory) return X, y
def extract_feature_vector(image): """Extract feature vector from the given image. """ ctrans_image = utils.convert_color_space(image, color_space=s.color_space) return f.get_feature_vector(ctrans_image)
def worker(nproc): def _print(*args, **kwargs): # Avoid printing the same stuff multiple times if nproc == 0: print(*args, **kwargs) def _regular_iterator(ls): for l in ls: yield l iterator = tqdm if nproc == 0 else _regular_iterator graph = nx.MultiDiGraph() if DIRECTIONAL_GRAPH else nx.MultiGraph() possible_targets = {} positive_train_triples = [] train_lines = count_file_lines(PATH_TRAIN) test_lines = count_file_lines(PATH_TEST) # Start and end ranges for the triples that this thread will process start_range_train = int(nproc * train_lines / N_THREADS) end_range_train = int((nproc + 1) * train_lines / N_THREADS) start_range_test = int(nproc * test_lines / N_THREADS) end_range_test = int((nproc + 1) * test_lines / N_THREADS) rels_to_study = None rels_study_path = f"datasets/{DATASET}/relations_to_study.txt" if isfile(rels_study_path): rels_to_study = [] with open(rels_study_path, "r") as f: for line in f: if line: rels_to_study.append(line.strip().split("\t")[0]) # Load the data from the training split _print("Loading training data") with open(PATH_TRAIN, "r") as f: for i, line in enumerate(f): spl = line.strip().split("\t") # Skip negative examples in the training split, since we generate our own negatives if len(spl) >= 4 and spl[3] != "1": continue s, r, t = spl[:3] if r not in possible_targets: possible_targets[r] = [] possible_targets[r].append(t) graph.add_edge(s, t, rel=r, key=r) if start_range_train <= i < end_range_train and ( rels_to_study is None or r in rels_to_study): positive_train_triples.append((s, r, t)) _print("Removing duplicate targets") # Remove duplicates from the possible targets dict for r, ls in possible_targets.items(): possible_targets[r] = list(set(ls)) with open(PATH_RELS, "r") as f: relations = [x.strip().split("\t")[0] for x in f.readlines()] # Generate the negatives by replacing the target entity with a random one # from the same range _print("Generating negatives") negative_train_triples = generate_negatives(positive_train_triples, possible_targets) labelled_triples_train = [ ((s, r, t, 1), None) for s, r, t in positive_train_triples ] + negative_train_triples _print("Computing features for the training split") training_csv = open(f"output/{DATASET}/train.csv.{nproc}", "a") centrality_indices = degree_centrality(graph) if not rels_to_study: rels_to_study = relations t1 = time.thread_time() for (s, r, t, label), orig in iterator(labelled_triples_train): fvec = get_feature_vector(graph, (s, r, t), relations, bool(label), orig, centrality_indices=centrality_indices, rels_to_study=rels_to_study) training_csv.write( f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n") t2 = time.thread_time() training_csv.close() _print("Loading testing data") labelled_triples_test = [] with open(PATH_TEST, "r") as f: for i, line in enumerate(f): if start_range_test <= i < end_range_test: spl = line.strip().split("\t") s, r, t, lbl = spl[:4] if rels_to_study is None or r in rels_to_study: labelled_triples_test.append( (s, r, t, 1 if lbl == "1" else 0)) _print("Computing features for the testing split") testing_csv = open(f"output/{DATASET}/test.csv.{nproc}", "a") t3 = time.thread_time() for s, r, t, label in iterator(labelled_triples_test): try: fvec = get_feature_vector(graph, (s, r, t), relations, centrality_indices=centrality_indices, rels_to_study=rels_to_study) except NodeNotFound: # Since the testing data does not appear in the training split, # an entity present in the testing split may not appear in the # graph generated by the training split. continue testing_csv.write( f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n") t4 = time.thread_time() testing_csv.close() elapsed_seconds = (t2 - t1) + (t4 - t3) with open("compute_times.txt", "a") as f: f.write( f"{DATASET};c{MAX_CONTEXT_SIZE};thread{nproc};{elapsed_seconds}\n")
def search_for_matches(self, image, region_of_interest=None, scale=1.0, visualize=False): """Apply sliding window search on the given image. :param image: the region which search is imposed on. :param region_of_interest: region in which the search is limited in. If unspecified (None), defaults to the full region of the image. Specified in the format: `((top-left-x, top-left-y), (bottom-right-x, bottom-right-y))` :param scale: Searching window scales. :param visualize: If True, returns a visualizing image. """ if visualize: # note: format for visualize_img is BGR visualize_img = np.copy(image) if region_of_interest is None: region_of_interest = ((0, 0), (image.shape[1], image.shape[0])) x_start, x_stop = region_of_interest[0][0], region_of_interest[1][0] y_start, y_stop = region_of_interest[0][1], region_of_interest[1][1] search_region = image[y_start:y_stop, x_start:x_stop, :] search_region = utils.convert_color_space(search_region, s.color_space) # print("Shape of search region: ", search_region.shape) # scaling the input if necessary if scale != 1: search_region = cv2.resize(search_region, (int(search_region.shape[1] / scale), int(search_region.shape[0] / scale))) # print("Scaled shape of search region: ", search_region.shape) # cars looked smaller and closer to the horizon. so I can limit the searching area # for smaller scale (which is used for searching for "small" car) to the upper part # of the search region crop = min((0.5 * scale, 1)) search_region = search_region[:int(crop * search_region.shape[0]), :] # size (number of pixels) of window size_window = 64 # parameters: pixels_per_cell = s.hog_pixels_per_cell cells_per_block = s.hog_cells_per_block channel = s.hog_channel orientations = s.hog_orientations # number of blocks per sliding window blocks_per_window = (size_window // pixels_per_cell) - cells_per_block + 1 # cell increments for sliding inc_cells = s.sliding_window_cells_increment # number of (complete) blocks horizontally (along x) / vertically (along y) num_blocks_x = (search_region.shape[1] // pixels_per_cell) - cells_per_block + 1 num_blocks_y = (search_region.shape[0] // pixels_per_cell) - cells_per_block + 1 # number of windows horizontally (along x) / vertically (along y) stepx = (num_blocks_x - blocks_per_window) // inc_cells + 1 stepy = (num_blocks_y - blocks_per_window) // inc_cells + 1 # get HOG features for the whole search region hog_features = f.get_hog(search_region, pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, orientations=orientations, channel=channel) # result window rects rects = [] for x in range(stepx): for y in range(stepy): xpos = x * inc_cells ypos = y * inc_cells x_tl = xpos * pixels_per_cell y_tl = ypos * pixels_per_cell win_img = search_region[y_tl:y_tl + size_window, x_tl:x_tl + size_window] win_hog = hog_features[:, ypos:ypos + blocks_per_window, xpos:xpos + blocks_per_window].ravel() features = f.get_feature_vector(win_img, subsampled_hog_features=win_hog) scaled_features = self.scaler.transform(features.reshape(1, -1)) prediction = self.classifier.predict(scaled_features) if prediction == 1: x_topleft = scale * x_tl y_topleft = scale * y_tl window_size = scale * size_window box = ((int(x_topleft + x_start), int(y_topleft + y_start)), (int(x_topleft + x_start + window_size), int(y_topleft + y_start + window_size))) rects.append(box) if visualize: cv2.rectangle(visualize_img, box[0], box[1], (255, 0, 0), 3) if visualize: return rects, visualize_img else: return rects
def main(): comm = Communicate(IP_ADDR) print('Handshake started') comm.get_handshake() print('Handshake completed') classifier = Classifier(FILE_PATH) print(classifier) freqPredict = FreqPredictor() if comm.has_handshake(): print("starting a new iteration: ") input("Press any key to continue") state_queue = deque() while True: if comm.has_handshake(): # Get data from IMU # raw_data = comm.getData(duration=1) # raw_data = comm.getData2(window = 60) raw_data = comm.getData2(window=45) if raw_data == None: print("Comms Error: None Type") break # Process data feature_vector = get_feature_vector(raw_data) predict = classifier.predict_once(feature_vector) predict = predict.lower() freqPredict.store_moves(predict) state_queue.append(predict) if (len(state_queue) == 2): if (predict == state_queue[0]): final_predict = state_queue.popleft() print('Final Prediction (Queue):', final_predict) send_prediction(final_predict, comm) state_queue.clear() freqPredict.clear_hist() continue else: if (freqPredict.get_hist_count() == 5): final_predict = freqPredict.get_predict() print('Final Prediction (Hist):', final_predict) send_prediction(final_predict, comm) state_queue.clear() freqPredict.clear_hist() continue else: state_queue.clear() state_queue.append(predict) continue else: if (freqPredict.get_hist_count() == 5): final_predict = freqPredict.get_predict() print('Final Prediction (Hist):', final_predict) send_prediction(final_predict, comm) state_queue.clear() freqPredict.clear_hist() continue else: if (predict == state_queue[0]): continue else: state_queue.clear() state_queue.append(predict) continue else: print('Handshake broken')
def load_dataset(files_directory, pickle_directory): files = glob.glob(files_directory + "*") x = [] y = [] rule_based_wrong_count = 0 rule_based_correct_count = 0 fnum = 1 try: raise Exception('Reload x and y') x = pkl.load(open(pickle_directory + 'x_all.pkl', 'rb')) y = pkl.load(open(pickle_directory + 'y_all.pkl', 'rb')) except: for fname in files: print 'processing file number', fnum, 'of', len(files), 'files' fnum += 1 mentionDictionary = load_dict(fname) classes = load_dict(pickle_directory + fname.split('/')[-1].split('.')[0] + '_classes.p') keys = sorted(mentionDictionary.keys()) len_keys = len(keys) for i in range(len_keys): print i, 'of', len_keys for j in range(i + 1, min(len_keys, i + 20)): mention1 = mentionDictionary[keys[i]] mention2 = mentionDictionary[keys[j]] x.append( features.get_feature_vector(mention1, mention2, classes)) if mention1['ID'] == mention2['ID']: y.append(1) else: y.append(0) # if We_Should_Consider(mention1,mention2): # x.append(features.get_feature_vector(mention1,mention2,classes)) # if mention1['ID'] == mention2['ID']: # y.append(1) # else: # y.append(0) # else: # if mention1['ID'] == mention2['ID']: # rule_based_wrong_count += 1 # else: # rule_based_correct_count += 1 pkl.dump(x, open(pickle_directory + 'x_all.pkl', 'wb')) pkl.dump(y, open(pickle_directory + 'y_all.pkl', 'wb')) # indices = {} # print 'Set of Values in y before sampling', set(y) # for t in set(y): # indices[t] = [i for i in range(len(y)) if y[i] == t] # min_len = min([len(indices[t]) for t in indices]) # for t in indices: # indices[t] = random.sample(indices[t], min_len/3) # print 'Zero Valued : ', len(indices[0]), [y[i] for i in indices[0][:10]] # print 'One Valued : ', len(indices[1]), [y[i] for i in indices[1][:10]] # indices = indices[0]+indices[1] # print 'indices finally', indices[:10] # # for i in indices: # # if y[i] == 1: # x_train = [] # y_train = [] # x_test = [] # y_test = [] # for i in range(len(y)): # if i in indices: # x_train.append(x[i]) # y_train.append(y[i]) # else: # x_test.append(x[i]) # y_test.append(y[i]) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.4, random_state=42) print len(x_train), len(y_train), len(x_test), len(y_test) return x_train, y_train, x_test, y_test, rule_based_wrong_count, rule_based_correct_count