def main(adjustable, h5_data_list, all_ranking, merged_training_pos, merged_training_neg): """Runs a the whole training and testing phase :return: array of dataset names, array containing the confusion matrix for each dataset, array containing the ranking for each dataset """ if not adjustable.load_model_name == None: model = load_model( os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, adjustable.load_model_name)) else: model = create_siamese_network(adjustable) if adjustable.cost_module_type == 'neural_network' or adjustable.cost_module_type == 'euclidean_fc': nadam = optimizers.Nadam(lr=adjustable.learning_rate, schedule_decay=pc.DECAY_RATE) model.compile(loss=adjustable.loss_function, optimizer=nadam, metrics=['accuracy']) elif adjustable.cost_module_type == 'euclidean' or adjustable.cost_module_type == 'cosine': rms = keras.optimizers.RMSprop() model.compile(loss=contrastive_loss, optimizer=rms, metrics=[absolute_distance_difference]) for epoch in range(adjustable.epochs): print('epoch %d/%d' % (epoch, adjustable.epochs)) # sample from the big set of negative training instances random.shuffle(merged_training_neg) training_neg_sample = merged_training_neg[0:len(merged_training_pos)] # now we have the final list of keys to the instances we use for training final_training_data = merged_training_pos + training_neg_sample random.shuffle(final_training_data) final_training_data = pu.sideways_shuffle(final_training_data) final_training_labels = [ int(final_training_data[item].strip().split(',')[-1]) for item in range(len(final_training_data)) ] if adjustable.cost_module_type == 'neural_network' or adjustable.cost_module_type == 'euclidean_fc': final_training_labels = keras.utils.to_categorical( final_training_labels, pc.NUM_CLASSES) train_network_light(adjustable, model, final_training_data, final_training_labels, h5_data_list) time_stamp = time.strftime('scnn_%d%m%Y_%H%M') if adjustable.save_inbetween and adjustable.iterations == 1: if epoch + 1 in adjustable.save_points: if adjustable.name_indication == 'epoch': model_name = time_stamp + '_epoch_%s_model.h5' % str( epoch + 1) weights_name = time_stamp + '_epoch_%s_weights.h5' % str( epoch + 1) elif adjustable.name_indication == 'dataset_name' and len( adjustable.datasets) == 1: model_name = time_stamp + '_%s_model.h5' % adjustable.datasets[ 0] weights_name = time_stamp + '_%s_weights.h5' % adjustable.datasets[ 0] else: model_name = None weights_name = None model.save( os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, model_name)) model.save_weights( os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, weights_name)) confusion_matrices = [] ranking_matrices = [] names = [] # for test_set in range(test_sets): for dataset in range(len(adjustable.datasets)): # name = test[test_set * 3] name = adjustable.datasets[dataset] names.append(name) this_ranking = all_ranking[dataset] test_data = ddl.grab_em_by_the_keys(this_ranking, h5_data_list) test_data = np.asarray(test_data) # make a record of the ranking selection for each dataset # for priming if adjustable.save_inbetween and adjustable.iterations == 1: file_name = '%s_ranking_%s.txt' % (name, adjustable.ranking_time_name) file_name = os.path.join(pc.SAVE_LOCATION_RANKING_FILES, file_name) with open(file_name, 'w') as my_file: for item in this_ranking: my_file.write(item) final_testing_labels = [ int(this_ranking[item].strip().split(',')[-1]) for item in range(len(this_ranking)) ] if adjustable.cost_module_type == 'neural_network' or adjustable.cost_module_type == 'euclidean_fc': final_testing_labels = keras.utils.to_categorical( final_testing_labels, pc.NUM_CLASSES) predictions = model.predict([test_data[0, :], test_data[1, :]]) # print predictions if adjustable.cost_module_type == 'euclidean' or adjustable.cost_module_type == 'cosine': new_thing = zip(predictions, final_testing_labels) print(new_thing[0:50]) # matrix = pu.make_confusion_matrix(predictions, test_labels) matrix = pu.make_confusion_matrix(adjustable, predictions, final_testing_labels) accuracy = (matrix[0] + matrix[2]) * 1.0 / (sum(matrix) * 1.0) if not matrix[0] == 0: precision = (matrix[0] * 1.0 / (matrix[0] + matrix[1] * 1.0)) else: precision = 0 confusion_matrices.append(matrix) ranking = pu.calculate_CMC(adjustable, predictions) ranking_matrices.append(ranking) print( '%s accuracy: %0.2f precision: %0.2f confusion matrix: %s \n CMC: \n %s' % (name, accuracy, precision, str(matrix), str(ranking))) del model return names, confusion_matrices, ranking_matrices
def get_final_training_data(adjustable, train_pos, train_neg): """ Merges the positive and negative training data together accordingly :param adjustable: object of class ProjectVariable :param train_pos: list of string pairs containing keys and labels of the positive training data :param train_neg: list of string pairs containing keys and labels of the negative training data :return: returns a single list of string pairs containing keys and labels of the training data """ if isinstance(train_pos, list): if type(train_pos[0]) == list: number_of_datasets = len(train_pos) elif type(train_pos[0]) == str: number_of_datasets = 1 else: print('Warning: something weird is happening') return else: print('Error: train_pos must be a list') return final_training_data = [] if adjustable.only_test == True: # only test, nothing to do print('Only testing, nothing to train here.') final_training_data = None else: # train if number_of_datasets == 0: print('Error: no training datasets have been specified') return elif number_of_datasets == 1: # normal shuffle, just take subset final_training_data = train_pos + train_neg random.shuffle(final_training_data) if adjustable.sideways_shuffle == True: final_training_data = pu.sideways_shuffle(final_training_data) random.shuffle(final_training_data) else: # can be train + test on multiple datasets # can be only train on multiple datasets # mixing does matter if adjustable.mix == True: # shuffle the data with each other # here we need to know if we only train or train+test if adjustable.dataset_test is None: # normal shuffle, just take subset final_training_data = train_pos + train_neg random.shuffle(final_training_data) if adjustable.sideways_shuffle == True: final_training_data = pu.sideways_shuffle( final_training_data) random.shuffle(final_training_data) else: if adjustable.mix_with_test == True: # mix with the test # normal shuffle, just take subset final_training_data = train_pos + train_neg random.shuffle(final_training_data) if adjustable.sideways_shuffle == True: final_training_data = pu.sideways_shuffle( final_training_data) random.shuffle(final_training_data) else: # don't mix with the test (which is at the end) # for each partition, shuffle and get a subset for index in range(len(train_neg)): partition = train_pos[index] + train_neg[index] random.shuffle(partition) if adjustable.sideways_shuffle == True: partition = pu.sideways_shuffle(partition) random.shuffle(partition) final_training_data += partition else: # train in order. # number of datasets don't matter # for each partition, shuffle and get a subset for index in range(len(train_neg)): partition = train_pos[index] + train_neg[index] random.shuffle(partition) if adjustable.sideways_shuffle == True: partition = pu.sideways_shuffle(partition) random.shuffle(partition) final_training_data += partition return final_training_data