Ejemplo n.º 1
0
 def test_load_test_data(self):
     d = data.HAPT()
     assert d._test_attrs is None
     assert d._test_labels is None
     d.load_test_data()
     assert len(d._test_attrs) > 0
     assert len(d._test_labels) > 0
     assert len(d._test_attrs) == len(d._test_labels)
     assert len(d.get_test_data()) == len(d.get_test_labels())
Ejemplo n.º 2
0
 def test_get_aggr2initial_labs_map(self):
     d = data.HAPT()
     d.load_all_data()
     d.aggregate_groups()
     assert d.get_aggr2initial_labs_map() == {
         'WALKING': ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS'],
         'STATIC': ['SITTING', 'STANDING', 'LAYING'],
         'TRANSITION': ['STAND_TO_SIT', 'SIT_TO_STAND', 'SIT_TO_LIE', 'LIE_TO_SIT', 'STAND_TO_LIE', 'LIE_TO_STAND']
     }
Ejemplo n.º 3
0
 def test_get_labels_map(self):
     orig_labels = {
         1: "WALKING",
         2: "WALKING_UPSTAIRS",
         3: "WALKING_DOWNSTAIRS",
         4: "SITTING",
         5: "STANDING",
         6: "LAYING",
         7: "STAND_TO_SIT",
         8: "SIT_TO_STAND",
         9: "SIT_TO_LIE",
         10: "LIE_TO_SIT",
         11: "STAND_TO_LIE",
         12: "LIE_TO_STAND"
     }
     d = data.HAPT()
     assert d._labels == {}
     d.get_labels_map()
     assert d._labels == orig_labels
     assert d.get_labels_map() == orig_labels
Ejemplo n.º 4
0
 def test_get_aggregated_train_labels(self):
     orig_labels = {
         1: "WALKING",
         2: "WALKING_UPSTAIRS",
         3: "WALKING_DOWNSTAIRS",
         4: "SITTING",
         5: "STANDING",
         6: "LAYING",
         7: "STAND_TO_SIT",
         8: "SIT_TO_STAND",
         9: "SIT_TO_LIE",
         10: "LIE_TO_SIT",
         11: "STAND_TO_LIE",
         12: "LIE_TO_STAND"
     }
     d = data.HAPT()
     d._labels = orig_labels
     d._test_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
     d._train_labels = [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
     assert d.get_aggregated_train_labels() == d._train_labels
     d.aggregate_groups()
     assert np.array_equal(d.get_aggregated_train_labels(), np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0]))
Ejemplo n.º 5
0
def get_samples_labels_datasets(dataset, aggregate):
    """
    return: [tuple of lists] (samples, labels, datasets)
    """
    d = data.HAPT()
    d.load_all_data()

    test_l = d.get_test_labels()
    train_l = d.get_train_labels()

    if aggregate:
        print("data aggregation...")
        d.aggregate_groups()
        test_l = d.get_aggregated_test_labels()
        train_l = d.get_aggregated_train_labels()

    test_s = d.get_test_data()
    train_s = d.get_train_data()

    if dataset == "all":
        return [test_s, train_s], [test_l, train_l], ["test_set", "train_set"]
    elif dataset == "train":
        return [train_s], [train_l], ["train_set"]
    return [test_s], [test_l], ["test_set"]
Ejemplo n.º 6
0
def main():
    args = parse_arguments(sys.argv[1:])

    print("Parameters:")
    for arg_ in args.sys_args:
        print(arg_)
    print()

    # read data
    # =========

    hapt_data = data.HAPT()
    hapt_data.load_all_data()
    hapt_data.aggregate_groups()

    exp_data = hapt_data.get_train_data()
    exp_labs = hapt_data.get_train_labels()
    exp_labels_map = hapt_data.get_labels_map()
    exp_centroids_num = len(hapt_data.get_labels_map())

    if args.data == "test":
        exp_data = hapt_data.get_test_data()
        exp_labs = hapt_data.get_test_labels()
        exp_centroids_num = len(hapt_data.get_labels_map())

    if args.aggregate:
        exp_labs = hapt_data.get_aggregated_train_labels()
        exp_labels_map = hapt_data.get_aggregated_labels_map()
        exp_centroids_num = len(hapt_data.get_aggregated_labels_map())
        if args.data == "test":
            exp_labs = hapt_data.get_aggregated_test_labels()

    # Show experiment data
    # ====================

    if args.showdata:
        utils.plot_clusters(exp_data, exp_labs, exp_labels_map, True)
        return

    # evolution
    # =========

    iterations_list, scores_list, populations_list, total_time_list, log_dir_list, best_indiv_idx_list = [],[],[],[],[],[]
    best_overall = (-1, 0, 0, 0
                    )  # score, experiment, generation (iteration), individual

    for exp_i in range(args.repeat):
        iterations, scores, populations, total_time, log_dir, best_indiv_idx = evolution.run_SGA(
            args.iter_num,
            exp_data,
            exp_labs,
            args.pop_num,
            args.prob_cross,
            args.prob_mutation,
            exp_centroids_num,
            args.adapt_function,
            args.dist_measure,
            log_dir="logs",
            loggin_pref="exp {}/{}: ".format(exp_i + 1, args.repeat))
        cur_best_score = scores[best_indiv_idx[0], best_indiv_idx[1]]
        if best_overall[0] < cur_best_score:
            best_overall = (cur_best_score, exp_i, best_indiv_idx[0],
                            best_indiv_idx[1])

        iterations_list.append(iterations)
        scores_list.append(scores)
        populations_list.append(populations)
        total_time_list.append(total_time)
        log_dir_list.append(log_dir)
        best_indiv_idx_list.append(best_indiv_idx)

        # save plot
        plot_tuple = ("pop:" + str(args.pop_num), "p_c:" +
                      str(args.prob_cross), "p_m:" + str(args.prob_mutation),
                      "data size:" + str(len(exp_labs)), args.adapt_function,
                      args.dist_measure)
        utils.plot_scores(iterations,
                          scores,
                          args.adapt_function,
                          plot_tuple,
                          to_file=True,
                          out_dir=log_dir)

    # visualize
    # =========
    if 1 < args.repeat:
        plot_tuple = ("pop:" + str(args.pop_num), "p_c:" +
                      str(args.prob_cross), "p_m:" + str(args.prob_mutation),
                      "data size:" + str(len(exp_labs)), args.adapt_function,
                      args.dist_measure)
        utils.plot_avg_scores(iterations_list,
                              scores_list,
                              args.adapt_function,
                              best_indiv_idx_list,
                              plot_tuple,
                              to_file=True,
                              out_dirs=log_dir_list)
Ejemplo n.º 7
0
def main():
    args = parse_arguments(sys.argv[1:])

    # read params
    # ===========
    # possible params:
    # iter_num, pop_num, centers_num, prob_cross, prob_mutation, data shape, labs shape,
    # adapt_function, dist_measure, log_dir, best score, best score (index), total_time

    exp_params = {}
    text_file = [f for f in os.listdir(args.path) if f.endswith(".txt")][0]
    with open(os.path.join(args.path, text_file), "r") as text_f:
        for line in text_f:
            line = line.replace("\t", "").strip().split(":")
            if len(line) == 2 and line[0] != "" and line[1] != "":
                if line[0] == "iter_num" or line[0] == "pop_num" or line[
                        0] == "centers_num":
                    exp_params[line[0].replace(" ", "_")] = int(line[1])
                elif line[0] == "prob_cross" or line[
                        0] == "prob_mutation" or line[0] == "best score":
                    exp_params[line[0].replace(" ", "_")] = float(line[1])
                elif line[0] == "data shape" or line[0] == "labs shape":
                    exp_params[line[0].replace(" ", "_")] = make_tuple(line[1])
                elif line[0] == "best score (index)":
                    #best score (index):	generation 95, individual 99
                    line[1] = line[1].strip().split(",")
                    exp_params["best_index"] = (
                        int(line[1][0].strip().split(" ")[1]),
                        int(line[1][1].strip().split(" ")[1]))
                else:
                    exp_params[line[0].replace(" ", "_")] = line[1]

    print("\nexperiment parameters were:")
    for k, v in exp_params.items():
        print("{:20}: {}".format(k, v))

    # read results
    # ============

    generations = np.load(os.path.join(args.path, "generations.npy"))
    iterations = np.load(os.path.join(args.path, "iterations.npy"))
    scores = np.load(os.path.join(args.path, "scores.npy"))

    best_centers = generations[exp_params["best_index"][0],
                               exp_params["best_index"][1]]

    print("\nobtained results are:")
    print(
        "generations (total num, pop size, centrs num, feats num): {}".format(
            generations.shape))
    print(
        "iterations (iterations num, ):                            {}".format(
            iterations.shape))
    print(
        "scores (total num, pop size):                             {}".format(
            scores.shape))
    print(
        "generations total num, iterations num and scores total num must be equal!"
    )
    print("generations pop size and scores pop size must be equal too!")

    plot_tuple = ("pop:" + str(exp_params["pop_num"]),
                  "p_c:" + str(exp_params["prob_cross"]),
                  "p_m:" + str(exp_params["prob_mutation"]),
                  "data size:" + str(len(exp_params["data_shape"])),
                  exp_params["adapt_function"], exp_params["dist_measure"],
                  "best score:" + str(exp_params["best_score"])[:9] + " at " +
                  str(exp_params["best_index"]))
    utils.plot_scores(iterations,
                      scores,
                      exp_params["adapt_function"],
                      plot_tuple,
                      not args.nooutput,
                      out_dir=args.outdir)

    # read data
    # =========
    print("reading data...")
    hapt_data = data.HAPT()
    hapt_data.load_all_data()
    hapt_data.aggregate_groups()

    test_data = hapt_data.get_test_data()
    test_labs = hapt_data.get_test_labels()
    train_data = hapt_data.get_train_data()
    train_labs = hapt_data.get_train_labels()
    labs_map = hapt_data.get_labels_map()
    if exp_params["centers_num"] == 3:
        test_labs = hapt_data.get_aggregated_test_labels()
        train_labs = hapt_data.get_aggregated_train_labels()
        labs_map = hapt_data.get_aggregated_labels_map()
    centroids_num = len(labs_map)

    assert exp_params["centers_num"] == centroids_num

    # do clusterizations
    # ==================
    print("clustering...")
    labels_names = list(labs_map.values())
    # train data
    train_clust_labs = cluster.Centroids.cluster(
        train_data, best_centers, dist_func=exp_params["dist_measure"])
    train_clust_labs = cluster.Utils.adjust_labels(train_clust_labs,
                                                   train_labs)
    train_silh = cluster.Evaluate.silhouette(train_data, train_clust_labs,
                                             exp_params["dist_measure"])
    train_silh_normalized = (train_silh + 1) / 2
    train_info_gain = cluster.Evaluate.information_gain(
        train_labs, train_clust_labs)
    mapped_train_clust_labs = [labs_map[l] for l in train_clust_labs]
    mapped_train_labs = [labs_map[l] for l in train_labs]
    train_conf_mtx = confusion_matrix(mapped_train_labs,
                                      mapped_train_clust_labs,
                                      labels=labels_names)
    print("train set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}".
          format(train_silh, train_silh_normalized, train_info_gain))
    # test data
    test_clust_labs = cluster.Centroids.cluster(
        test_data, best_centers, dist_func=exp_params["dist_measure"])
    test_clust_labs = cluster.Utils.adjust_labels(test_clust_labs, test_labs)
    test_silh = cluster.Evaluate.silhouette(test_data, test_clust_labs,
                                            exp_params["dist_measure"])
    test_silh_normalized = (test_silh + 1) / 2
    test_info_gain = cluster.Evaluate.information_gain(test_labs,
                                                       test_clust_labs)
    mapped_test_clust_labs = [labs_map[l] for l in test_clust_labs]
    mapped_test_labs = [labs_map[l] for l in test_labs]
    test_conf_mtx = confusion_matrix(mapped_test_labs,
                                     mapped_test_clust_labs,
                                     labels=labels_names)
    print("test set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}".
          format(test_silh, test_silh_normalized, test_info_gain))

    # Show data
    # =========
    print("creating plots...")
    # clusters
    utils.plot_clusters(train_data,
                        train_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="train_orig_clusters")
    utils.plot_clusters(train_data,
                        train_clust_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="train_obtained_clusters")
    utils.plot_clusters(test_data,
                        test_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="test_orig_clusters")
    utils.plot_clusters(test_data,
                        test_clust_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="test_obtained_clusters")

    # confusion matrices
    utils.plot_confusion_matrix(
        train_conf_mtx,
        labels_names,
        normalize=False,
        title=
        'Confusion matrix\ntrain set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})'
        .format(train_silh, train_silh_normalized, train_info_gain),
        cmap=plt.cm.Blues,
        out_dir=args.outdir,
        filename="train_conf_matr_silh_info_gain")
    utils.plot_confusion_matrix(
        test_conf_mtx,
        labels_names,
        normalize=False,
        title=
        'Confusion matrix\ntest set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})'
        .format(test_silh, test_silh_normalized, test_info_gain),
        cmap=plt.cm.Blues,
        out_dir=args.outdir,
        filename="test_conf_matr_silh_info_gain")
    print("inference ended")
Ejemplo n.º 8
0
 def test_get_train_data(self):
     d = data.HAPT()
     assert d._train_attrs is None
     d.get_train_data()
     assert len(d._train_attrs) > 0
     assert len(d.get_train_data()) > 0
Ejemplo n.º 9
0
 def test_get_test_labels(self):
     d = data.HAPT()
     assert d._test_labels is None
     d.get_test_labels()
     assert len(d._test_labels) > 0
     assert len(d.get_test_labels()) > 0
Ejemplo n.º 10
0
 def test_get_aggregated_labels_map(self):
     d = data.HAPT()
     assert d.get_aggregated_labels_map() == {0: "WALKING", 1: "STATIC", 2: "TRANSITION"}