def test(self, test_dataset): mean_f1_score, mean_precision, mean_recall, mean_distance_precision, mean_theta_regression = 0, 0, 0, 0, 0 num_data_points = 0 num_distance_theta_cases = 0 for data_point_ix, data_point in enumerate(test_dataset): image, visible_objects = data_point # Compute probabilities over list of visible objects log_prob_landmark, log_prob_distance, log_prob_theta = self.model.get_probs( [[image]]) prob_landmark = list(torch.exp(log_prob_landmark.data)[0]) prob_distance = list(torch.exp(log_prob_distance.data)[0]) prob_theta = list(torch.exp(log_prob_theta.data)[0]) predicted_set = dict() for i in range(0, 63): argmax_val = gp.get_argmax_action(prob_landmark[i]) if argmax_val == 1: # predicted the distance and angle argmax_landmark_val = gp.get_argmax_action( prob_distance[i]) argmax_theta_val = gp.get_argmax_action(prob_theta[i]) predicted_set[i] = (argmax_landmark_val, argmax_theta_val) f1_score, precision, recall, distance_precision, theta_regression, num_distance_theta_cases_ = \ SupervisedLearningDetectSymbolicEnvironment.get_f1_score(visible_objects, predicted_set) # self.update_confusion_matrix(visible_objects, predicted_set) mean_f1_score += f1_score mean_precision += precision mean_recall += recall num_data_points += 1 mean_distance_precision += distance_precision mean_theta_regression += theta_regression num_distance_theta_cases += num_distance_theta_cases_ mean_f1_score /= float(max(num_data_points, 1)) mean_precision /= float(max(num_data_points, 1)) mean_recall /= float(max(num_data_points, 1)) mean_distance_precision /= float(max(num_distance_theta_cases, 1)) mean_theta_regression /= float(max(num_distance_theta_cases, 1)) logging.info( "Object detection accuracy on a dataset of size %r the mean f1 score is %r, precision %r, recall %r", num_data_points, mean_f1_score, mean_precision, mean_recall) logging.info( "Object location accuracy on %r cases the mean distance precision is %r and theta regression is %r angle", num_distance_theta_cases, mean_distance_precision, mean_theta_regression * 7.5)
def test1(self, test_dataset, test_real_dataset): num_data_points = 0 num_used_landmarks = 0 theta_accuracy = 0 neighbouring_accuracy = 0 symbolic_landmark_accuracy = 0 for data_point_ix, data_point in enumerate(test_dataset): image, visible_objects = data_point # Compute probabilities over list of visible objects log_prob_theta = self.model.get_probs([[image]]) prob_theta = list(torch.exp(log_prob_theta.data)[0]) data_point_real = test_real_dataset[data_point_ix] gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point_real) gold_theta_1_corrected = (gold_theta_1 + 6) % 12 if gold_theta_1_corrected == gp.get_argmax_action( prob_theta[gold_landmark]): symbolic_landmark_accuracy += 1 for i in range(0, self.num_landmark): if i in visible_objects: # predicted the distance and angle argmax_theta_val = gp.get_argmax_action(prob_theta[i]) gold_angle = visible_objects[i][1] if argmax_theta_val == gold_angle: theta_accuracy += 1 angle_diff = min((argmax_theta_val - gold_angle) % 12, (gold_angle - argmax_theta_val) % 12) if angle_diff <= 1: neighbouring_accuracy += 1 num_used_landmarks += 1 num_data_points += 1 theta_accuracy /= float(max(num_used_landmarks, 1)) neighbouring_accuracy /= float(max(num_used_landmarks, 1)) symbolic_landmark_accuracy /= float(max(len(test_real_dataset), 1)) logging.info( "Num datapoints %r, num visible landmarks %r and mean theta accuracy %r and neigbhouring accuracy %r", num_data_points, num_used_landmarks, theta_accuracy, neighbouring_accuracy) logging.info("Accuracy for landmark mentioned in the text is %r", symbolic_landmark_accuracy) return theta_accuracy
def predict_action(self, batch_replay_items): if len(batch_replay_items) <= 1: return None num_items = len(batch_replay_items) action_batch = [] batch_input = [] for replay_item in batch_replay_items: next_image_emb = replay_item.get_next_image_emb() if next_image_emb is None: # sometimes it can None for the last item in a rollout continue action_batch.append(replay_item.get_action()) image_emb = replay_item.get_image_emb() x = torch.cat([image_emb, next_image_emb], 2) batch_input.append(x) batch_input = torch.cat(batch_input) model_log_prob_batch = self.model.action_prediction_log_prob( batch_input) log_prob = list(model_log_prob_batch.data) for i in range(0, num_items - 1): predicted_action = gp.get_argmax_action(log_prob[i]) if action_batch[i] != predicted_action: self.wrong[action_batch[i]] += 1 else: self.correct[action_batch[i]] += 1 logging.info("Was %r and predicted %r, wrong %r, correct %r", action_batch[i], predicted_action, self.wrong, self.correct)
def predict_angle_from_resnet(self, test_dataset, test_images): angle_accuracy = 0 for data_point_ix, data_point in enumerate(test_dataset): gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) # Compute probabilities over list of visible objects log_prob_landmark, log_prob_distance, log_prob_theta = self.resnet_detection_model.get_probs( [[test_images[data_point_ix]]]) prob_landmark = list(torch.exp(log_prob_landmark.data)[0]) prob_distance = list(torch.exp(log_prob_distance.data)[0]) prob_theta = list(torch.exp(log_prob_theta.data)[0]) # Find the angle of the gold landmark and compare if gold_landmark < 63 and gold_theta_1 < 4.0: print "GOLD LANDMARK is " + str(gold_landmark) argmax_theta_val = gp.get_argmax_action(prob_theta[gold_landmark]) print "ARGMAX THETA VAL " + str(argmax_theta_val) + " and " + str(gold_theta_1) if argmax_theta_val == gold_theta_1: angle_accuracy += 1 angle_accuracy = (angle_accuracy * 100.0)/float(len(test_dataset)) logging.info("Angle accuracy of gold landmark is %r", angle_accuracy)
def test(self, test_dataset): num_data_points = 0 num_used_landmarks = 0 theta_accuracy = 0 neighbouring_accuracy = 0 for data_point_ix, data_point in enumerate(test_dataset): image, visible_objects = data_point # Compute probabilities over list of visible objects log_prob_theta = self.model.get_probs([[image]]) prob_theta = list(torch.exp(log_prob_theta.data)[0]) for i in range(0, self.num_landmark): if i in visible_objects: # predicted the distance and angle argmax_theta_val = gp.get_argmax_action(prob_theta[i]) gold_angle = visible_objects[i][1] if argmax_theta_val == gold_angle: theta_accuracy += 1 angle_diff = min((argmax_theta_val - gold_angle) % 12, (gold_angle - argmax_theta_val) % 12) if angle_diff <= 1: neighbouring_accuracy += 1 num_used_landmarks += 1 num_data_points += 1 theta_accuracy /= float(max(num_used_landmarks, 1)) neighbouring_accuracy /= float(max(num_used_landmarks, 1)) logging.info( "Num datapoints %r, num visible landmarks %r and mean theta accuracy %r and neigbhouring accuracy %r", num_data_points, num_used_landmarks, theta_accuracy, neighbouring_accuracy) return theta_accuracy
def test_classifier(self, agent, test_dataset): fp, fn, tp, tn = 0, 0, 0, 0 fn_examples = [] fp_examples = [] perfect_segmented_examples = [] for data_point_ix, data_point in enumerate(test_dataset): state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=None, # image, previous_action=None) segments = data_point.get_instruction_oracle_segmented() segment_lens = [len(s) for s in segments] num_mistakes = 0 for i, seg_len in enumerate(segment_lens): segment_instruction = debug.instruction_to_string( segments[i], self.config) num_read = 0 while num_read < seg_len: state = state.update_on_read() num_read += 1 candidate_instruction = debug.instruction_to_string( segments[i][:num_read], self.config) model_log_probs = list( self.model.get_segmentation_probs([state ]).view(-1).data) pred_action = gp.get_argmax_action(model_log_probs) if num_read < seg_len and pred_action == 0: tn += 1 elif num_read < seg_len and pred_action == 1: fp += 1 num_mistakes += 1 fp_examples.append( (candidate_instruction, segment_instruction)) elif num_read == seg_len and pred_action == 0: fn += 1 num_mistakes += 1 fn_examples.append( (candidate_instruction, segment_instruction)) elif num_read == seg_len and pred_action == 1: tp += 1 state = state.update_on_act_halt() if num_mistakes == 0: instruction_strings = [] for seg in segments: instruction_strings.append( debug.instruction_to_string(seg, self.config)) perfect_segmented_examples.append( " ----- ".join(instruction_strings)) # calculate precision if fp + tp > 0: precision = (tp * 1.0) / (fp + tp) else: precision = 1.0 # calculate recall if fn + tp > 0: recall = (tp * 1.0) / (fn + tp) else: recall = 1.0 if precision + recall > 0: f1 = (2.0 * precision * recall) / (precision + recall) else: f1 = 0.0 # print FP examples random.shuffle(fp_examples) logging.info("FP EXAMPLES:") for ex in fp_examples[:20]: logging.info(ex) # print FN examples random.shuffle(fn_examples) logging.info("FN EXAMPLES:") for ex in fn_examples[:20]: logging.info(ex) # print perfect segmented examples random.shuffle(perfect_segmented_examples) logging.info("PERFECT SEGMENTED EXAMPLES:") for ex in perfect_segmented_examples[:20]: logging.info(ex) logging.info("testing results: precision=%.2f; recall=%f; f1=%.2f" % (precision, recall, f1))
def test_classifier(self, agent, test_dataset): accuracy = 0 landmark_accuracy = 0 theta_1_accuracy = 0 theta_2_accuracy = 0 theta_1_regression_accuracy = 0 theta_2_regression_accuracy = 0 r_accuracy = 0 cmatrix_landmark = np.zeros((67, 67)) cmatrix_theta1 = np.zeros((NO_BUCKETS, NO_BUCKETS)) cmatrix_theta2 = np.zeros((NO_BUCKETS, NO_BUCKETS)) cmatrix_range = np.zeros((15, 15)) for data_point_ix, data_point in enumerate(test_dataset): state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=None, previous_action=None) prob_landmark, prob_theta_1, prob_theta_2, prob_r = self.model.get_symbolic_text_batch( [state]) prob_landmark_float = list(torch.exp(prob_landmark.data)[0]) prob_theta_1_float = list(torch.exp(prob_theta_1.data)[0]) prob_theta_2_float = list(torch.exp(prob_theta_2.data)[0]) prob_r_float = list(torch.exp(prob_r.data)[0]) landmark = gp.get_argmax_action(prob_landmark_float) theta_1 = gp.get_argmax_action(prob_theta_1_float) theta_2 = gp.get_argmax_action(prob_theta_2_float) r = gp.get_argmax_action(prob_r_float) gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) plaintext_sentence = self.get_sentence(data_point.instruction) sentence_printed = False def direction(angle_binned): direction = "FAIL" if 42 <= angle_binned or 0 <= angle_binned < 6: direction = "BEHIND OF" if 6 <= angle_binned < 18: direction = "RIGHT OF" if 18 <= angle_binned < 30: direction = "FRONT OF" if 30 <= angle_binned < 42: direction = "LEFT OF" return direction if gold_landmark == landmark: landmark_accuracy += 1 #else: #if not sentence_printed: # print "SENTENCE IS: " + plaintext_sentence # sentence_printed = True #print "INCORRECT LANDMARK: " + LANDMARK_NAMES[landmark] + " instead of " + LANDMARK_NAMES[gold_landmark] if gold_theta_1 == theta_1: theta_1_accuracy += 1 if gold_theta_2 == theta_2: theta_2_accuracy += 1 elif direction(theta_2) != direction(gold_theta_2): if not sentence_printed: print "SENTENCE IS: " + plaintext_sentence sentence_printed = True print "INCORRECT THETA: " + direction(theta_2) + "(" + str( theta_2) + ")" + " instead of " + direction( gold_theta_2) + "(" + str(gold_theta_2) + ")" theta_1_regression_accuracy += min( (gold_theta_1 - theta_1) % NO_BUCKETS, NO_BUCKETS - (gold_theta_1 - theta_1) % NO_BUCKETS) theta_2_regression_accuracy += min( (gold_theta_2 - theta_2) % NO_BUCKETS, NO_BUCKETS - (gold_theta_2 - theta_2) % NO_BUCKETS) if gold_r == r: r_accuracy += 1 if gold_landmark == landmark and gold_theta_1 == theta_1 and gold_theta_2 == theta_2 and gold_r == r: accuracy += 1 # update confusion matrix cmatrix_landmark[gold_landmark][landmark] += 1 cmatrix_theta1[gold_theta_1][theta_1] += 1 cmatrix_theta2[gold_theta_2][theta_2] += 1 cmatrix_range[gold_r][r] += 1 dataset_size = len(test_dataset) landmark_accuracy = (landmark_accuracy * 100) / float( max(1, dataset_size)) theta_1_accuracy = (theta_1_accuracy * 100) / float( max(1, dataset_size)) theta_2_accuracy = (theta_2_accuracy * 100) / float( max(1, dataset_size)) theta_1_regression_accuracy = (BUCKET_WIDTH * theta_1_regression_accuracy) / float( max(1, dataset_size)) theta_2_regression_accuracy = (BUCKET_WIDTH * theta_2_regression_accuracy) / float( max(1, dataset_size)) r_accuracy = (r_accuracy * 100) / float(max(1, dataset_size)) accuracy = (accuracy * 100) / float(max(1, dataset_size)) logging.info( "Test accuracy on dataset of size %r is landmark %r, theta1 %r %r angle, theta2 %r %r angle, " "r %r, total acc %r", dataset_size, landmark_accuracy, theta_1_accuracy, theta_1_regression_accuracy, theta_2_accuracy, theta_2_regression_accuracy, r_accuracy, accuracy)
def test(self, agent, test_dataset): mean_f1_score = 0 num_data_points = 0 for data_point_ix, data_point in enumerate(test_dataset): image, metadata = agent.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) trajectory = data_point.get_trajectory() for action in trajectory: # Compute probabilities over list of visible objects log_prob, visible_objects = self.model.get_probs_and_visible_objects([state]) prob = list(torch.exp(log_prob.data)[0]) predicted_set = set([]) for i in range(0, 63): argmax_val = gp.get_argmax_action(prob[i]) if argmax_val == 1: predicted_set.add(i) f1_score = SupervisedLearningDetectVisibleObject.get_f1_score(visible_objects[0], predicted_set) self.update_confusion_matrix(visible_objects[0], predicted_set) mean_f1_score += f1_score num_data_points += 1 # print "Visible objects " + str(visible_objects[0]) # print "Predicted Set" + str(predicted_set) # print "F1 score " + str(f1_score) # raw_input("Enter to proceed") # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback(action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback() mean_f1_score /= float(max(num_data_points, 1)) logging.info("Object detection accuracy on a dataset of size %r the mean f1 score is %r", num_data_points, mean_f1_score)
def test_classifier(self, agent, test_dataset, test_images): accuracy = 0 landmark_accuracy = 0 landmark_bucket_accuracy = 0 theta_1_accuracy = 0 theta_2_accuracy = 0 theta_1_regression_accuracy = 0 theta_2_regression_accuracy = 0 r_accuracy = 0 cmatrix_landmark = np.zeros((67, 67)) cmatrix_theta1 = np.zeros((self.num_buckets, self.num_buckets)) cmatrix_theta2 = np.zeros((self.num_buckets, self.num_buckets)) cmatrix_range = np.zeros((15, 15)) for data_point_ix, data_point in enumerate(test_dataset): state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, data_point=data_point, start_image=test_images[data_point_ix], previous_action=None) prob_landmark, prob_theta_1, prob_theta_2, prob_r = self.model.get_symbolic_text_batch([state]) prob_landmark_float = list(torch.exp(prob_landmark.data)[0]) prob_theta_1_float = list(torch.exp(prob_theta_1.data)[0]) prob_theta_2_float = list(torch.exp(prob_theta_2.data)[0]) prob_r_float = list(torch.exp(prob_r.data)[0]) ################################################### # Heuristic code for finding argmax over landmark but only from visible set landmark_pos_dict = state.get_landmark_pos_dict() visible_objects = self.get_existing_landmarks(landmark_pos_dict) max_score = 0 max_scoring_visible_object = -1 for i in visible_objects: if prob_landmark_float[i] > max_score: max_score = prob_landmark_float[i] max_scoring_visible_object = i landmark = max_scoring_visible_object assert landmark != -1 ################################################### # landmark = gp.get_argmax_action(prob_landmark_float) theta_1 = gp.get_argmax_action(prob_theta_1_float) theta_2 = gp.get_argmax_action(prob_theta_2_float) r = gp.get_argmax_action(prob_r_float) gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) plaintext_sentence = self.get_sentence(data_point.instruction) x_pos, z_pos, y_angle = data_point.get_start_pos() landmark_r_theta_dict = self.get_all_landmark_r_theta(x_pos, z_pos, y_angle, landmark_pos_dict) if landmark_r_theta_dict[LANDMARK_NAMES[landmark]][1] == landmark_r_theta_dict[LANDMARK_NAMES[gold_landmark]][1]: landmark_bucket_accuracy += 1 if gold_landmark == landmark: landmark_accuracy += 1 else: logging.info("Sentence is %r, predicts landmark %r instead of %r", plaintext_sentence, LANDMARK_NAMES[landmark], LANDMARK_NAMES[gold_landmark]) if gold_theta_1 == theta_1: theta_1_accuracy += 1 if gold_theta_2 == theta_2: theta_2_accuracy += 1 theta_1_regression_accuracy += min((gold_theta_1 - theta_1) % self.num_buckets, self.num_buckets - (gold_theta_1 - theta_1) % self.num_buckets) theta_2_regression_accuracy += min((gold_theta_2 - theta_2) % self.num_buckets, self.num_buckets - (gold_theta_2 - theta_2) % self.num_buckets) if gold_r == r: r_accuracy += 1 if gold_landmark == landmark and gold_theta_1 == theta_1 and gold_theta_2 == theta_2 and gold_r == r: accuracy += 1 # update confusion matrix cmatrix_landmark[gold_landmark][landmark] += 1 cmatrix_theta1[gold_theta_1][theta_1] += 1 cmatrix_theta2[gold_theta_2][theta_2] += 1 cmatrix_range[gold_r][r] += 1 dataset_size = len(test_dataset) landmark_accuracy = (landmark_accuracy * 100) / float(max(1, dataset_size)) landmark_bucket_accuracy = (landmark_bucket_accuracy * 100) / float(max(1, dataset_size)) theta_1_accuracy = (theta_1_accuracy * 100) / float(max(1, dataset_size)) theta_2_accuracy = (theta_2_accuracy * 100) / float(max(1, dataset_size)) theta_1_regression_accuracy = (self.discretize * theta_1_regression_accuracy) / float(max(1, dataset_size)) theta_2_regression_accuracy = (self.discretize * theta_2_regression_accuracy) / float(max(1, dataset_size)) r_accuracy = (r_accuracy * 100) / float(max(1, dataset_size)) accuracy = (accuracy * 100) / float(max(1, dataset_size)) logging.info( "Test accuracy on dataset of size %r is %r percentage", accuracy) logging.info("Landmark accuracy is %r, landmark bucket accuracy is %r", landmark_accuracy, landmark_bucket_accuracy) logging.info("Theta 1 accuracy is %r and regression accuracy is %r degree", theta_1_accuracy, theta_1_regression_accuracy) logging.info("Theta 2 accuracy is %r and regression accuracy is %r degree", theta_2_accuracy, theta_2_regression_accuracy) logging.info("Distance accuracy is %r", r_accuracy)