def produce_concatenated_features(old_score_folder, filename, model_list, feature_folder_dirs, modalities_with_new_features, \ modalities_to_regenerate): new_input_data = [] # Convert the shared feature filename into the individual feature file naming format. #i.e. from act_0_index_2_combo_010.features.npy into act_0_index_2.features.npy feature_filename = filename[:filename.index("_combo")] + ".features.npy" # Convert the shared feature filename into the score naming format #i.e. from act_0_index_2_combo_010.features.npy into act_0_index_2_combo_010.scores.npy score_filename = filename.replace("features", "scores") # Convert the score name into the new score file name #i.e. from act_0_index_2_combo_010.scores.npy into act_0_index_2_combo_010_newcombo_111.scores.npy new_score_filename = score_filename[:score_filename.index(".scores")] # This produces the new combination of modalities that we are fusing together (both generated and new) new_combination = [ modalities_with_new_features[i] or modalities_to_regenerate[i] for i in range(0, len(modalities_with_new_features)) ] new_score_filename += "_newcombo_" + "".join( str(i) for i in new_combination) + ".scores.npy" # Iterate through each modality. for i in range(0, len(model_list)): # We should take the input for this new modality. if modalities_with_new_features[i]: input_data = loadArrayFromFile(feature_folder_dirs[i] + "/" + feature_filename) new_input_data.append(input_data) # We should regenerate the feature for this modality. elif modalities_to_regenerate[i]: score_data = loadArrayFromFile(old_score_folder + "/" + score_filename) #print(score_data) #print(np.array(modalities_to_regenerate).shape) score_data = np.hstack([ score_data, np.expand_dims(np.array(modalities_to_regenerate), axis=0) ]) #print(score_data.shape) #score_data = score_data.reshape(1, score_data.shape[0]) input_data = model_list[i].predict(score_data, batch_size=1) new_input_data.append(input_data) # We don't have new input, and we don't have features to regenerate. # This means it's just a zero array of feature size for the modality. else: input_data = np.zeros_like( loadArrayFromFile(feature_folder_dirs[i] + "/" + feature_filename)) new_input_data.append(input_data) new_input_data = np.hstack(new_input_data) return new_input_data, new_score_filename
def train_feature_fusion_model(training_dir, validation_dir, model_save_path = "saved_model/fusion/fusion_model", total_epochs=10): if not os.path.exists(model_save_path): subprocess.call('mkdir ' + model_save_path, shell=True) training_filelist = os.listdir(training_dir) validation_files = os.listdir(validation_dir) batch_size = 64 # Of these chunks, we use this many as a batch example_input = loadArrayFromFile(training_dir + "/" + training_filelist[0]) #Get some example input input_length = example_input.shape[-1:] steps_per_epoch = ceil(len(training_filelist) / batch_size) validation_steps = ceil(len(validation_files) / batch_size) model = FusionClassifier(input_length, 6) tensorboard_folder = "fusion_model" save_model_callback = ModelCheckpoint(model_save_path + "/epoch_{epoch:05d}.h5", period=1) model.fit_generator( generate_chunks(parent_dir=training_dir, files=training_filelist, batch_size=batch_size), epochs=total_epochs, steps_per_epoch = steps_per_epoch, validation_data=generate_chunks(parent_dir=validation_dir, files=validation_files, batch_size=batch_size),#,callbacks=[TensorBoard(log_dir="logs/3dCNN")] validation_steps =validation_steps, callbacks=[TensorBoard(log_dir="logs/" + tensorboard_folder), save_model_callback])
def load_data_and_label(parent_dir, filename): data = loadArrayFromFile(parent_dir + "/" + filename) label = int( filename[filename.index("act_") + 4 : filename.index("_index")] ) data = data.flatten() return data, label
def load_data_from_file(filepath, isScore=False): data = loadArrayFromFile(filepath) data = data.flatten() # If this is a score file, we need to append some modailty list information if isScore: modality_list = convert_scorepath_to_combolist(filepath) data = np.append(data, modality_list) #data = np.expand_dims(data, axis=0) return data
def createAugmentedCombo(permutation_list, modality_feature_files, new_feature_top_dir): # Get the numpy arrays for each modality modality_features_data = [] for feature_file in modality_feature_files: #data_to_append = loadArrayFromFile(feature_file) #print(feature_file) #print(data_to_append.shape) modality_features_data.append(loadArrayFromFile(feature_file)) modality_zeroed_features_data = [] for feature_data in modality_features_data: modality_zeroed_features_data.append(np.zeros_like(feature_data)) # Now we have two lists - one contains data, the other contains the same sized data but of all zeroes. # We can now use our combinations. for permutation in permutation_list: #print(permutation) current_data_combination = [] for i in range(0, len(permutation)): decision_boolean = permutation[i] #This will either be true or false. if decision_boolean: #We add current_data_combination.append(modality_features_data[i]) else: current_data_combination.append(modality_zeroed_features_data[i]) # Create a concatenated vector: current_data_combination = np.hstack(current_data_combination) #print(current_data_combination.shape) # Create the name for this combined feature vector. data_filename = modality_feature_files[0][modality_feature_files[0].index("ks/")+3 : modality_feature_files[0].index(".features")] data_filename += "_combo_" + ''.join(str(int(e)) for e in permutation) + ".features" # Save the concatenated data to the file. saveArrayToFile(current_data_combination, new_feature_top_dir + "/" + data_filename)
def evaluate(): # Files in this folder are of teh format: act_0_index_5_combo_010.scores.npy old_score_dir = "scores/augmented_testing" # Files in this folder are of the format: act_0_index_5_combo_010_newcombo_111.scores.npy new_score_dir = "scores/reclassified_testing" new_score_files = os.listdir(new_score_dir) old_score_files = os.listdir(old_score_dir) print("Num Classifications: " + str(len(old_score_files))) modality_dirs = [ "image", "imu", "audio" ] # MAKE SURE THIS MATCHES WITH THE TRAINING AND PREDICTION # These are the metrics for reclassification num_incorrect_to_correct = 0 num_correct_to_incorrect = 0 num_incorrect_to_incorrect = 0 num_correct_to_correct = 0 correct_to_incorrect_dicts = [{} for i in range(0, 5)] incorrect_to_correct_dicts = [{} for i in range(0, 5)] #These are the metrics for no reclassification num_correct = 0 num_incorrect = 0 # Iterate through each new score, and compare with the old score. # For every new score, remove the corresponding old score from the file list. for new_score_file in new_score_files: # Get the correct activity true_prediction = int(new_score_file[new_score_file.index("act_") + 4:new_score_file.index("_index")]) # Find the corresponding old score filename old_score_filename = new_score_file[:new_score_file. index("_newcombo")] + ".scores.npy" # Remove the old score from the file list. if old_score_filename in old_score_files: old_score_files.remove(old_score_filename) # Find the filepaths for the old and new score files, and get the numpy arrays. new_score_filepath = new_score_dir + "/" + new_score_file old_score_filepath = old_score_dir + "/" + old_score_filename new_scores = loadArrayFromFile(new_score_filepath) old_scores = loadArrayFromFile(old_score_filepath) old_modality_list = new_score_file[new_score_file.index("combo_") + 6:new_score_file.index("_newcombo")] old_modality_list = [int(c) for c in old_modality_list] new_modality_list = new_score_file[new_score_file.index("newcombo_") + 9:new_score_file.index(".scores")] new_modality_list = [int(c) for c in new_modality_list] # print(new_score_file) # print(old_modality_list) # print(new_modality_list) added_modality_list = [ new_modality_list[i] - old_modality_list[i] for i in range(0, len(old_modality_list)) ] # Get the top activity from both: top_generated_prediction = np.argmax(new_scores) top_old_prediction = np.argmax(old_scores) # Old prediction was wrong, new prediction is right if top_old_prediction != true_prediction and top_generated_prediction == true_prediction: num_incorrect_to_correct += 1 incorrect_to_correct_dicts = addToDictionaries(incorrect_to_correct_dicts, true_prediction, \ old_modality_list, added_modality_list, modality_dirs, num_classes=6).copy() # Old prediction was right, new prediction is wrong elif top_old_prediction == true_prediction and top_generated_prediction != true_prediction: num_correct_to_incorrect += 1 correct_to_incorrect_dicts = addToDictionaries(correct_to_incorrect_dicts, true_prediction, \ old_modality_list, added_modality_list, modality_dirs, num_classes=6) # Old prediction was wrong, new prediction is wrong elif top_old_prediction != true_prediction and top_generated_prediction != true_prediction: num_incorrect_to_incorrect += 1 # Old prediction was right, new prediction is right elif top_old_prediction == true_prediction and top_generated_prediction == true_prediction: num_correct_to_correct += 1 print("Num Ignored Classifications " + str(len(old_score_files))) # Now iterate through the remaining old files, and see how many opportunities we missed with reclassification. for old_score_file in old_score_files: # Get the correct activity true_prediction = int(old_score_file[old_score_file.index("act_") + 4:old_score_file.index("_index")]) old_score_filepath = old_score_dir + "/" + old_score_file old_scores = loadArrayFromFile(old_score_filepath) top_old_prediction = np.argmax(old_scores) # If the predictions match if top_old_prediction == true_prediction: num_correct += 1 # if the predictions don't match else: num_incorrect += 1 print("Num Correct without Reclassification: " + str(num_correct)) print("Num Incorrect without Reclassification: " + str(num_incorrect)) # Remember, this may be more than the actual number of classifications # because each old classification may have multiple possible modality combinations # added to it for reclassification. print("\n\nNum Reclassifications: " + str(len(new_score_files))) print("\tIncorrect to Correct: " + str(num_incorrect_to_correct)) print("\tCorrect to Incorrect: " + str(num_correct_to_incorrect)) print("\tIncorrect to Incorrect: " + str(num_incorrect_to_incorrect)) print("\tCorrect to Correct: " + str(num_correct_to_correct)) # print("\n\n Metrics For Correct To Incorrect: ") # pprint(correct_to_incorrect_dicts) # print("\n\n Metrics For Incorrect To Correct: ") pprint(incorrect_to_correct_dicts) return [num_incorrect_to_correct, num_correct_to_incorrect, num_incorrect_to_incorrect, num_correct_to_correct, \ len(new_score_files), num_incorrect, len(old_score_files), incorrect_to_correct_dicts]
def loadChunkAndLabelFromFile(filepath): sequence = int(filepath[filepath.index("seq_") + 4: filepath.index("_ci")]) label = findLabelFromSequence(sequence) return loadArrayFromFile(filepath), label
def loadChunkAndLabelFromID(parent_dir, sequence, chunk_index, chunk_size): filename = "seq_" + str(sequence) + "_ci_" + str(chunk_index) + "_cs_" + str(chunk_size) + ".npy" label = findLabelFromSequence(sequence) return loadArrayFromFile(parent_dir + "/" + filename), label