def test_load_test_data(self): d = data.HAPT() assert d._test_attrs is None assert d._test_labels is None d.load_test_data() assert len(d._test_attrs) > 0 assert len(d._test_labels) > 0 assert len(d._test_attrs) == len(d._test_labels) assert len(d.get_test_data()) == len(d.get_test_labels())
def test_get_aggr2initial_labs_map(self): d = data.HAPT() d.load_all_data() d.aggregate_groups() assert d.get_aggr2initial_labs_map() == { 'WALKING': ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS'], 'STATIC': ['SITTING', 'STANDING', 'LAYING'], 'TRANSITION': ['STAND_TO_SIT', 'SIT_TO_STAND', 'SIT_TO_LIE', 'LIE_TO_SIT', 'STAND_TO_LIE', 'LIE_TO_STAND'] }
def test_get_labels_map(self): orig_labels = { 1: "WALKING", 2: "WALKING_UPSTAIRS", 3: "WALKING_DOWNSTAIRS", 4: "SITTING", 5: "STANDING", 6: "LAYING", 7: "STAND_TO_SIT", 8: "SIT_TO_STAND", 9: "SIT_TO_LIE", 10: "LIE_TO_SIT", 11: "STAND_TO_LIE", 12: "LIE_TO_STAND" } d = data.HAPT() assert d._labels == {} d.get_labels_map() assert d._labels == orig_labels assert d.get_labels_map() == orig_labels
def test_get_aggregated_train_labels(self): orig_labels = { 1: "WALKING", 2: "WALKING_UPSTAIRS", 3: "WALKING_DOWNSTAIRS", 4: "SITTING", 5: "STANDING", 6: "LAYING", 7: "STAND_TO_SIT", 8: "SIT_TO_STAND", 9: "SIT_TO_LIE", 10: "LIE_TO_SIT", 11: "STAND_TO_LIE", 12: "LIE_TO_STAND" } d = data.HAPT() d._labels = orig_labels d._test_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] d._train_labels = [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1] assert d.get_aggregated_train_labels() == d._train_labels d.aggregate_groups() assert np.array_equal(d.get_aggregated_train_labels(), np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0]))
def get_samples_labels_datasets(dataset, aggregate): """ return: [tuple of lists] (samples, labels, datasets) """ d = data.HAPT() d.load_all_data() test_l = d.get_test_labels() train_l = d.get_train_labels() if aggregate: print("data aggregation...") d.aggregate_groups() test_l = d.get_aggregated_test_labels() train_l = d.get_aggregated_train_labels() test_s = d.get_test_data() train_s = d.get_train_data() if dataset == "all": return [test_s, train_s], [test_l, train_l], ["test_set", "train_set"] elif dataset == "train": return [train_s], [train_l], ["train_set"] return [test_s], [test_l], ["test_set"]
def main(): args = parse_arguments(sys.argv[1:]) print("Parameters:") for arg_ in args.sys_args: print(arg_) print() # read data # ========= hapt_data = data.HAPT() hapt_data.load_all_data() hapt_data.aggregate_groups() exp_data = hapt_data.get_train_data() exp_labs = hapt_data.get_train_labels() exp_labels_map = hapt_data.get_labels_map() exp_centroids_num = len(hapt_data.get_labels_map()) if args.data == "test": exp_data = hapt_data.get_test_data() exp_labs = hapt_data.get_test_labels() exp_centroids_num = len(hapt_data.get_labels_map()) if args.aggregate: exp_labs = hapt_data.get_aggregated_train_labels() exp_labels_map = hapt_data.get_aggregated_labels_map() exp_centroids_num = len(hapt_data.get_aggregated_labels_map()) if args.data == "test": exp_labs = hapt_data.get_aggregated_test_labels() # Show experiment data # ==================== if args.showdata: utils.plot_clusters(exp_data, exp_labs, exp_labels_map, True) return # evolution # ========= iterations_list, scores_list, populations_list, total_time_list, log_dir_list, best_indiv_idx_list = [],[],[],[],[],[] best_overall = (-1, 0, 0, 0 ) # score, experiment, generation (iteration), individual for exp_i in range(args.repeat): iterations, scores, populations, total_time, log_dir, best_indiv_idx = evolution.run_SGA( args.iter_num, exp_data, exp_labs, args.pop_num, args.prob_cross, args.prob_mutation, exp_centroids_num, args.adapt_function, args.dist_measure, log_dir="logs", loggin_pref="exp {}/{}: ".format(exp_i + 1, args.repeat)) cur_best_score = scores[best_indiv_idx[0], best_indiv_idx[1]] if best_overall[0] < cur_best_score: best_overall = (cur_best_score, exp_i, best_indiv_idx[0], best_indiv_idx[1]) iterations_list.append(iterations) scores_list.append(scores) populations_list.append(populations) total_time_list.append(total_time) log_dir_list.append(log_dir) best_indiv_idx_list.append(best_indiv_idx) # save plot plot_tuple = ("pop:" + str(args.pop_num), "p_c:" + str(args.prob_cross), "p_m:" + str(args.prob_mutation), "data size:" + str(len(exp_labs)), args.adapt_function, args.dist_measure) utils.plot_scores(iterations, scores, args.adapt_function, plot_tuple, to_file=True, out_dir=log_dir) # visualize # ========= if 1 < args.repeat: plot_tuple = ("pop:" + str(args.pop_num), "p_c:" + str(args.prob_cross), "p_m:" + str(args.prob_mutation), "data size:" + str(len(exp_labs)), args.adapt_function, args.dist_measure) utils.plot_avg_scores(iterations_list, scores_list, args.adapt_function, best_indiv_idx_list, plot_tuple, to_file=True, out_dirs=log_dir_list)
def main(): args = parse_arguments(sys.argv[1:]) # read params # =========== # possible params: # iter_num, pop_num, centers_num, prob_cross, prob_mutation, data shape, labs shape, # adapt_function, dist_measure, log_dir, best score, best score (index), total_time exp_params = {} text_file = [f for f in os.listdir(args.path) if f.endswith(".txt")][0] with open(os.path.join(args.path, text_file), "r") as text_f: for line in text_f: line = line.replace("\t", "").strip().split(":") if len(line) == 2 and line[0] != "" and line[1] != "": if line[0] == "iter_num" or line[0] == "pop_num" or line[ 0] == "centers_num": exp_params[line[0].replace(" ", "_")] = int(line[1]) elif line[0] == "prob_cross" or line[ 0] == "prob_mutation" or line[0] == "best score": exp_params[line[0].replace(" ", "_")] = float(line[1]) elif line[0] == "data shape" or line[0] == "labs shape": exp_params[line[0].replace(" ", "_")] = make_tuple(line[1]) elif line[0] == "best score (index)": #best score (index): generation 95, individual 99 line[1] = line[1].strip().split(",") exp_params["best_index"] = ( int(line[1][0].strip().split(" ")[1]), int(line[1][1].strip().split(" ")[1])) else: exp_params[line[0].replace(" ", "_")] = line[1] print("\nexperiment parameters were:") for k, v in exp_params.items(): print("{:20}: {}".format(k, v)) # read results # ============ generations = np.load(os.path.join(args.path, "generations.npy")) iterations = np.load(os.path.join(args.path, "iterations.npy")) scores = np.load(os.path.join(args.path, "scores.npy")) best_centers = generations[exp_params["best_index"][0], exp_params["best_index"][1]] print("\nobtained results are:") print( "generations (total num, pop size, centrs num, feats num): {}".format( generations.shape)) print( "iterations (iterations num, ): {}".format( iterations.shape)) print( "scores (total num, pop size): {}".format( scores.shape)) print( "generations total num, iterations num and scores total num must be equal!" ) print("generations pop size and scores pop size must be equal too!") plot_tuple = ("pop:" + str(exp_params["pop_num"]), "p_c:" + str(exp_params["prob_cross"]), "p_m:" + str(exp_params["prob_mutation"]), "data size:" + str(len(exp_params["data_shape"])), exp_params["adapt_function"], exp_params["dist_measure"], "best score:" + str(exp_params["best_score"])[:9] + " at " + str(exp_params["best_index"])) utils.plot_scores(iterations, scores, exp_params["adapt_function"], plot_tuple, not args.nooutput, out_dir=args.outdir) # read data # ========= print("reading data...") hapt_data = data.HAPT() hapt_data.load_all_data() hapt_data.aggregate_groups() test_data = hapt_data.get_test_data() test_labs = hapt_data.get_test_labels() train_data = hapt_data.get_train_data() train_labs = hapt_data.get_train_labels() labs_map = hapt_data.get_labels_map() if exp_params["centers_num"] == 3: test_labs = hapt_data.get_aggregated_test_labels() train_labs = hapt_data.get_aggregated_train_labels() labs_map = hapt_data.get_aggregated_labels_map() centroids_num = len(labs_map) assert exp_params["centers_num"] == centroids_num # do clusterizations # ================== print("clustering...") labels_names = list(labs_map.values()) # train data train_clust_labs = cluster.Centroids.cluster( train_data, best_centers, dist_func=exp_params["dist_measure"]) train_clust_labs = cluster.Utils.adjust_labels(train_clust_labs, train_labs) train_silh = cluster.Evaluate.silhouette(train_data, train_clust_labs, exp_params["dist_measure"]) train_silh_normalized = (train_silh + 1) / 2 train_info_gain = cluster.Evaluate.information_gain( train_labs, train_clust_labs) mapped_train_clust_labs = [labs_map[l] for l in train_clust_labs] mapped_train_labs = [labs_map[l] for l in train_labs] train_conf_mtx = confusion_matrix(mapped_train_labs, mapped_train_clust_labs, labels=labels_names) print("train set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}". format(train_silh, train_silh_normalized, train_info_gain)) # test data test_clust_labs = cluster.Centroids.cluster( test_data, best_centers, dist_func=exp_params["dist_measure"]) test_clust_labs = cluster.Utils.adjust_labels(test_clust_labs, test_labs) test_silh = cluster.Evaluate.silhouette(test_data, test_clust_labs, exp_params["dist_measure"]) test_silh_normalized = (test_silh + 1) / 2 test_info_gain = cluster.Evaluate.information_gain(test_labs, test_clust_labs) mapped_test_clust_labs = [labs_map[l] for l in test_clust_labs] mapped_test_labs = [labs_map[l] for l in test_labs] test_conf_mtx = confusion_matrix(mapped_test_labs, mapped_test_clust_labs, labels=labels_names) print("test set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}". format(test_silh, test_silh_normalized, test_info_gain)) # Show data # ========= print("creating plots...") # clusters utils.plot_clusters(train_data, train_labs, labs_map, True, out_dir=args.outdir, filename="train_orig_clusters") utils.plot_clusters(train_data, train_clust_labs, labs_map, True, out_dir=args.outdir, filename="train_obtained_clusters") utils.plot_clusters(test_data, test_labs, labs_map, True, out_dir=args.outdir, filename="test_orig_clusters") utils.plot_clusters(test_data, test_clust_labs, labs_map, True, out_dir=args.outdir, filename="test_obtained_clusters") # confusion matrices utils.plot_confusion_matrix( train_conf_mtx, labels_names, normalize=False, title= 'Confusion matrix\ntrain set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})' .format(train_silh, train_silh_normalized, train_info_gain), cmap=plt.cm.Blues, out_dir=args.outdir, filename="train_conf_matr_silh_info_gain") utils.plot_confusion_matrix( test_conf_mtx, labels_names, normalize=False, title= 'Confusion matrix\ntest set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})' .format(test_silh, test_silh_normalized, test_info_gain), cmap=plt.cm.Blues, out_dir=args.outdir, filename="test_conf_matr_silh_info_gain") print("inference ended")
def test_get_train_data(self): d = data.HAPT() assert d._train_attrs is None d.get_train_data() assert len(d._train_attrs) > 0 assert len(d.get_train_data()) > 0
def test_get_test_labels(self): d = data.HAPT() assert d._test_labels is None d.get_test_labels() assert len(d._test_labels) > 0 assert len(d.get_test_labels()) > 0
def test_get_aggregated_labels_map(self): d = data.HAPT() assert d.get_aggregated_labels_map() == {0: "WALKING", 1: "STATIC", 2: "TRANSITION"}