Beispiel #1
0
    def __init__(self, parameters):
        super().__init__(parameters)
        print("Bag of Temporal Words being initiated...")

        self.codebook_name = None
        self.all_codebooks_dir = utils.create_dir(utils.get_root_path("saved_objects") + "/codebooks")

        self.codebook_plot_path = utils.get_root_path("Results") + "/" + parameters.study_name + "/codebook plots/"
        self.codebook_plots = utils.create_dir(self.codebook_plot_path)
        self.features = [self.compute_histogram]
Beispiel #2
0
    def __init__(self,
                 desc_type: DescType,
                 subject_dataset: types.subj_dataset,
                 parameters: StudyParameters,
                 seq_len: SeqLen,
                 extra_name: str = "") -> None:

        print("\nProducing dataset descriptors...\n")
        self.desc_type = desc_type
        self.__dataset_descriptors = None

        self.parameters = parameters
        self.extra_name = extra_name
        self.seq_len = seq_len

        # self.__dataset_desc_root_path = utils.get_root_path("dataset_desc")
        # if there is no root directory for dataset descriptors, create it
        saved_obj_subdir = self.parameters.study_name + "/dataset_descriptors"
        self.__saved_desc_dir = utils.create_dir(
            join(utils.get_root_path("saved_objects"), saved_obj_subdir))

        # create the full name of the dataset as well, without the path to get there
        self.__dataset_desc_name = self.__produce_dataset_desc_name()
        self.__desc_obj_path = join(self.__saved_desc_dir,
                                    self.dataset_desc_name)

        # remove any files remaining from previous tests
        utils.cleanup(self.saved_desc_dir, "_test")
Beispiel #3
0
    def compute_dataset_stats(self, feature_dataset, filename):
        # create a new feature dataset that is the same as the first, except it does not have dimensions that are 1.
        squeezed_feature_dataset = {}
        for subj_name, subj in feature_dataset.items():
            new_data = np.squeeze(subj.data)
            new_subj = copy(subj)
            new_subj.data = new_data

            squeezed_feature_dataset[subj_name] = new_subj

        dataframe_dict = self.__create_dataframe_dict(squeezed_feature_dataset)
        all_subj_dataframe = self.__create_allsubj_dataframe(dataframe_dict)

        groups = all_subj_dataframe.groupby("category")
        for name, group in groups:
            l1norm = np.linalg.norm(group[[0, 1, 2, 3, 4]].values.astype(float), axis=1)
            print(":/")
        # group_desc = groups.describe()
        group_cov = groups.cov()
        print(group_cov)

        filepath = utils.get_root_path("Results") + "/" + parameters.study_name + "/codebook results/"
        file_name = utils.create_dir(filepath) + "dataset_covariance_" + filename + ".csv"
        group_cov.to_csv(file_name, sep='\t')

        # corr_aggr = corr.aggregate()
        # seaborn scatter matrix
        # g = sns.pairplot(all_subj_dataframe, hue='category', diag_kind='hist')
        # plt.show()

        # pandas correlation matrix
        # plt.matshow(all_subj_dataframe.corr())
        # plt.show()

        return all_subj_dataframe
def main():
    config_dir = "config_files"
    config = StudyConfig(config_dir)

    # the object with variable definitions based on the specified configuration file. It includes data description,
    # definitions of run parameters (independent of deep definitions vs not)
    parameters = config.populate_study_parameters("CTS_UbiComp2020_1sample.toml")
    print(parameters)

    data = DataConstructor(parameters)
    test_data = data.test_subj_dataset

    all_categories = [str(i) for i in range(0, 36)]
    for i, cat in enumerate(all_categories):
        if len(cat) == 1:
            all_categories[i] = "button00" + cat
        elif len(cat) == 2:
            all_categories[i] = "button0" + cat

    model_subdir = join(parameters.study_name, "trained_models")
    saved_model_dir = utils.create_dir(join(utils.get_root_path("saved_objects"), model_subdir))

    model_name = "LSTM-batch-128-CTS_UbiComp2020_DescType.RawData_SeqLen.ExtendEdge_lstm_stat_2000e-fold-2-10.pt"
    model_path = join(saved_model_dir, model_name)

    predicted_val = sample_val(test_data, model_path)

    print(f"Predicted Category is {predicted_val}.")
    def get_conditions_across_datasets(self, path_list, label_list, save_dir):
        """
        Combines images across datasets and saves them in a new directory under Results.

        Args:
            path_list: a list of paths of figures to combine
            label_list: a list of labels to name figures by
            save_dir: the path to the directory to save the images

        Returns:

        """
        plot_dir_path = util.get_root_path("Results")
        if plot_dir_path is not None:
            path_files = {}
            for path in path_list:
                path = plot_dir_path + "/" + path
                assert os.path.exists(
                    path), "Path " + path + " does not exist."
                file_ls = util.get_files_in_dir(path)
                path_files[path] = file_ls

            for label in label_list:
                img_sub_list = []
                pattern = "_" + str(label) + ".png"
                for paths, files in path_files.items():
                    for f in files:
                        fname = os.path.basename(f)
                        if fname.endswith(pattern):
                            img_sub_list.append(f)
                fig_name = os.path.join(save_dir,
                                        "p1_all_cond_" + str(label) + ".png")
                self.create_figure(img_sub_list, fig_name, 1, 3, save_dir)
Beispiel #6
0
    def __init__(self,
                 data_splitter: DataSplitter,
                 feature_constructor: FeatureConstructor,
                 category_balancer: CategoryBalancer,
                 parameters: StudyParameters,
                 learning_def: LearningDefinition,
                 all_categories: List[str],
                 extra_model_name: str = ""):
        self.__all_categories = all_categories
        self.__data_splitter = data_splitter
        self.__feature_constructor = feature_constructor
        self.__category_balancer = category_balancer
        self.__category_map = utils.map_categories(all_categories)
        self.__learning_def = learning_def
        self.__parameters = parameters
        self.__num_folds = parameters.num_folds
        self.__extra_model_name = extra_model_name
        self.__classification = parameters.classification

        self.__model_name = ""
        self.__model_path = ""

        tbx_name = parameters.study_name + "/tensorboardX_runs"
        self.__tbx_path = utils.create_dir(
            join(utils.get_root_path("Results"), tbx_name))

        results_log_subdir = self.parameters.study_name + "/learning_logs"
        self.__results_log_path = utils.create_dir(
            join(utils.get_root_path("Results"), results_log_subdir))
        self._result_logger = self.define_result_logger()

        model_subdir = parameters.study_name + "/trained_models"
        self.__saved_model_dir = utils.create_dir(
            join(utils.get_root_path("saved_objects"), model_subdir))

        confusion_matrix_subdir = parameters.study_name + "/confusion_matrices"
        self.__confusion_matrix_obj_dir = utils.create_dir(
            join(utils.get_root_path("saved_objects"),
                 confusion_matrix_subdir))
        self.__cv_confusion_matrix = np.zeros(
            (len(all_categories), len(all_categories)))
        self.__test_confusion_matrix = np.zeros(
            (len(all_categories), len(all_categories)))
    def set_data_path(dir_path):
        if dir_path is "None":
            attr = None
        else:
            project_root_path = util.get_root_path("Resources")
            path = os.path.abspath(os.path.join(project_root_path, dir_path))

            assert (os.path.exists(
                path)), "The directory \'" + path + "\' does not exist. Ensure the dataset is properly placed."
            attr = path
        return attr
    def __init__(self, descriptor_computer: DescriptorComputer, all_dataset_categories: List[str], heatmap_global:
    np.ndarray) -> None:

        # always use heatmap_global, not self.__heatmap in the parallelized section when writing to the array. Each
        # process has its own copy of a class and its variables, so self.__heatmap would not reflect all changes if
        # it was written to by several processes. For each process, its self.__heatmap is set to the same memory
        # location as heatmap_global.

        print(f"\nPerforming descriptor dataset evaluation...\n")

        self.__distance = ELD()
        self.__heatmap = heatmap_global
        self.descriptor_computer = descriptor_computer

        # if there is no root directory for dataset descriptors, create it
        eval_subdir = parameters.study_name + "/descriptor_evaluation"
        self.__dataset_eval_dir = utils.create_dir(join(utils.get_root_path("saved_objects"), eval_subdir))

        # create the full path of the dataset evaluation object
        self.__eval_obj_path = join(self.dataset_eval_dir, self.dataset_eval_name)

        # create a directory under Results to save the resulting heatmap figure and result logs.
        self.__results_eval_dir = utils.create_dir(join(utils.get_root_path("Results"), eval_subdir))

        # Removes any files that contain the string "_test" in the dataset evaluation directory under saved_objects,
        # as well as any saved heatmaps or statistic text files with that name under Results directory.
        utils.cleanup(self.dataset_eval_dir, "_test")
        utils.cleanup(self.results_eval_dir, "_test")

        self.__num_processes = multiprocessing.cpu_count() * 2
        self.compute_heatmap(all_dataset_categories)

        # defining the logger before the multiprocessing task causes a "cannot pickle RLock error" since
        # the logger holds a lock to the file.
        self.__result_logger = self.define_result_logger()
        print("")
    def get_CTS_column_view(self, rel_path):
        """
        Creates combined figure of images per column of a 3 x 12 button-pad, where the label is the identity of the
        button. These plots are saved in "combined_plots".

        Args:
            rel_path: the study name from where to get the subject_view plots

        Returns:

        """
        assert "CTS" in rel_path, "This method is only valid for CTS dataset"

        plot_dir_path = util.get_root_path("Results")
        if plot_dir_path is not None:
            # plot_dir_path = plot_dir_path + "/" + study_name + "/dataset plots/subject_view"
            plot_dir_path = plot_dir_path + rel_path
            img_list = util.get_files_in_dir(plot_dir_path)

            dict = {}
            dict['col1'] = (1, 13, 25)
            dict['col2'] = (2, 14, 26)
            dict['col3'] = (3, 15, 27)
            dict['col4'] = (4, 16, 28)
            dict['col5'] = (5, 17, 29)
            dict['col6'] = (6, 18, 30)
            dict['col7'] = (7, 19, 31)
            dict['col8'] = (8, 20, 32)
            dict['col9'] = (9, 21, 33)
            dict['col10'] = (10, 22, 34)
            dict['col11'] = (11, 23, 35)
            dict['col12'] = (12, 24, 36)
            figure_names = sorted(dict.keys())
            print(figure_names)

            print(len(img_list))
            j = 0
            for col, buttons in dict.items():
                img_sub_list = []
                for i, elem in enumerate(buttons):
                    pattern = '_' + str(elem) + '.png'
                    for entry in img_list:
                        if entry.endswith(pattern):
                            img_sub_list.append(entry)
                print(figure_names[j])
                self.create_figure(img_sub_list, figure_names[j] + "_spec.png",
                                   1, 3)
                j = j + 1
Beispiel #10
0
    def log_kmeans_score(self, nclusters_list, interval_size, name_extra_str=""):

        filepath = utils.get_root_path("Results") + "/" + parameters.study_name + "/codebook results/"
        filepath = utils.create_dir(filepath) + "cluster_eval.txt"

        with open(filepath, 'a') as the_file:
            for nclust in nclusters_list:
                codebook_alg_name = "_kmeans_" + str(nclust)
                dataset_desc_name = "CTS_firm_chunk_" + str(parameters.samples_per_chunk) + "_interval_" + str(
                    interval_size) + name_extra_str
                feature_constructor.generate_codebook(subject_dict, dataset_desc_name, nclust)
                silouhette, calinski_harabaz = feature_constructor.score_kmeans(dataset_desc_name, dataset_desc_name +
                                                                                codebook_alg_name)
                the_file.write(
                    'Number of clusters: ' + str(nclust) + "; Interval size: " + str(interval_size) + ":  Silouhette "
                                                                                                      "Score: " + str(
                        silouhette) + ";  Calinksi-Harabaz Score: " + str(calinski_harabaz) + "\n")