def __init__(self, parameters): super().__init__(parameters) print("Bag of Temporal Words being initiated...") self.codebook_name = None self.all_codebooks_dir = utils.create_dir(utils.get_root_path("saved_objects") + "/codebooks") self.codebook_plot_path = utils.get_root_path("Results") + "/" + parameters.study_name + "/codebook plots/" self.codebook_plots = utils.create_dir(self.codebook_plot_path) self.features = [self.compute_histogram]
def __init__(self, desc_type: DescType, subject_dataset: types.subj_dataset, parameters: StudyParameters, seq_len: SeqLen, extra_name: str = "") -> None: print("\nProducing dataset descriptors...\n") self.desc_type = desc_type self.__dataset_descriptors = None self.parameters = parameters self.extra_name = extra_name self.seq_len = seq_len # self.__dataset_desc_root_path = utils.get_root_path("dataset_desc") # if there is no root directory for dataset descriptors, create it saved_obj_subdir = self.parameters.study_name + "/dataset_descriptors" self.__saved_desc_dir = utils.create_dir( join(utils.get_root_path("saved_objects"), saved_obj_subdir)) # create the full name of the dataset as well, without the path to get there self.__dataset_desc_name = self.__produce_dataset_desc_name() self.__desc_obj_path = join(self.__saved_desc_dir, self.dataset_desc_name) # remove any files remaining from previous tests utils.cleanup(self.saved_desc_dir, "_test")
def compute_dataset_stats(self, feature_dataset, filename): # create a new feature dataset that is the same as the first, except it does not have dimensions that are 1. squeezed_feature_dataset = {} for subj_name, subj in feature_dataset.items(): new_data = np.squeeze(subj.data) new_subj = copy(subj) new_subj.data = new_data squeezed_feature_dataset[subj_name] = new_subj dataframe_dict = self.__create_dataframe_dict(squeezed_feature_dataset) all_subj_dataframe = self.__create_allsubj_dataframe(dataframe_dict) groups = all_subj_dataframe.groupby("category") for name, group in groups: l1norm = np.linalg.norm(group[[0, 1, 2, 3, 4]].values.astype(float), axis=1) print(":/") # group_desc = groups.describe() group_cov = groups.cov() print(group_cov) filepath = utils.get_root_path("Results") + "/" + parameters.study_name + "/codebook results/" file_name = utils.create_dir(filepath) + "dataset_covariance_" + filename + ".csv" group_cov.to_csv(file_name, sep='\t') # corr_aggr = corr.aggregate() # seaborn scatter matrix # g = sns.pairplot(all_subj_dataframe, hue='category', diag_kind='hist') # plt.show() # pandas correlation matrix # plt.matshow(all_subj_dataframe.corr()) # plt.show() return all_subj_dataframe
def main(): config_dir = "config_files" config = StudyConfig(config_dir) # the object with variable definitions based on the specified configuration file. It includes data description, # definitions of run parameters (independent of deep definitions vs not) parameters = config.populate_study_parameters("CTS_UbiComp2020_1sample.toml") print(parameters) data = DataConstructor(parameters) test_data = data.test_subj_dataset all_categories = [str(i) for i in range(0, 36)] for i, cat in enumerate(all_categories): if len(cat) == 1: all_categories[i] = "button00" + cat elif len(cat) == 2: all_categories[i] = "button0" + cat model_subdir = join(parameters.study_name, "trained_models") saved_model_dir = utils.create_dir(join(utils.get_root_path("saved_objects"), model_subdir)) model_name = "LSTM-batch-128-CTS_UbiComp2020_DescType.RawData_SeqLen.ExtendEdge_lstm_stat_2000e-fold-2-10.pt" model_path = join(saved_model_dir, model_name) predicted_val = sample_val(test_data, model_path) print(f"Predicted Category is {predicted_val}.")
def get_conditions_across_datasets(self, path_list, label_list, save_dir): """ Combines images across datasets and saves them in a new directory under Results. Args: path_list: a list of paths of figures to combine label_list: a list of labels to name figures by save_dir: the path to the directory to save the images Returns: """ plot_dir_path = util.get_root_path("Results") if plot_dir_path is not None: path_files = {} for path in path_list: path = plot_dir_path + "/" + path assert os.path.exists( path), "Path " + path + " does not exist." file_ls = util.get_files_in_dir(path) path_files[path] = file_ls for label in label_list: img_sub_list = [] pattern = "_" + str(label) + ".png" for paths, files in path_files.items(): for f in files: fname = os.path.basename(f) if fname.endswith(pattern): img_sub_list.append(f) fig_name = os.path.join(save_dir, "p1_all_cond_" + str(label) + ".png") self.create_figure(img_sub_list, fig_name, 1, 3, save_dir)
def __init__(self, data_splitter: DataSplitter, feature_constructor: FeatureConstructor, category_balancer: CategoryBalancer, parameters: StudyParameters, learning_def: LearningDefinition, all_categories: List[str], extra_model_name: str = ""): self.__all_categories = all_categories self.__data_splitter = data_splitter self.__feature_constructor = feature_constructor self.__category_balancer = category_balancer self.__category_map = utils.map_categories(all_categories) self.__learning_def = learning_def self.__parameters = parameters self.__num_folds = parameters.num_folds self.__extra_model_name = extra_model_name self.__classification = parameters.classification self.__model_name = "" self.__model_path = "" tbx_name = parameters.study_name + "/tensorboardX_runs" self.__tbx_path = utils.create_dir( join(utils.get_root_path("Results"), tbx_name)) results_log_subdir = self.parameters.study_name + "/learning_logs" self.__results_log_path = utils.create_dir( join(utils.get_root_path("Results"), results_log_subdir)) self._result_logger = self.define_result_logger() model_subdir = parameters.study_name + "/trained_models" self.__saved_model_dir = utils.create_dir( join(utils.get_root_path("saved_objects"), model_subdir)) confusion_matrix_subdir = parameters.study_name + "/confusion_matrices" self.__confusion_matrix_obj_dir = utils.create_dir( join(utils.get_root_path("saved_objects"), confusion_matrix_subdir)) self.__cv_confusion_matrix = np.zeros( (len(all_categories), len(all_categories))) self.__test_confusion_matrix = np.zeros( (len(all_categories), len(all_categories)))
def set_data_path(dir_path): if dir_path is "None": attr = None else: project_root_path = util.get_root_path("Resources") path = os.path.abspath(os.path.join(project_root_path, dir_path)) assert (os.path.exists( path)), "The directory \'" + path + "\' does not exist. Ensure the dataset is properly placed." attr = path return attr
def __init__(self, descriptor_computer: DescriptorComputer, all_dataset_categories: List[str], heatmap_global: np.ndarray) -> None: # always use heatmap_global, not self.__heatmap in the parallelized section when writing to the array. Each # process has its own copy of a class and its variables, so self.__heatmap would not reflect all changes if # it was written to by several processes. For each process, its self.__heatmap is set to the same memory # location as heatmap_global. print(f"\nPerforming descriptor dataset evaluation...\n") self.__distance = ELD() self.__heatmap = heatmap_global self.descriptor_computer = descriptor_computer # if there is no root directory for dataset descriptors, create it eval_subdir = parameters.study_name + "/descriptor_evaluation" self.__dataset_eval_dir = utils.create_dir(join(utils.get_root_path("saved_objects"), eval_subdir)) # create the full path of the dataset evaluation object self.__eval_obj_path = join(self.dataset_eval_dir, self.dataset_eval_name) # create a directory under Results to save the resulting heatmap figure and result logs. self.__results_eval_dir = utils.create_dir(join(utils.get_root_path("Results"), eval_subdir)) # Removes any files that contain the string "_test" in the dataset evaluation directory under saved_objects, # as well as any saved heatmaps or statistic text files with that name under Results directory. utils.cleanup(self.dataset_eval_dir, "_test") utils.cleanup(self.results_eval_dir, "_test") self.__num_processes = multiprocessing.cpu_count() * 2 self.compute_heatmap(all_dataset_categories) # defining the logger before the multiprocessing task causes a "cannot pickle RLock error" since # the logger holds a lock to the file. self.__result_logger = self.define_result_logger() print("")
def get_CTS_column_view(self, rel_path): """ Creates combined figure of images per column of a 3 x 12 button-pad, where the label is the identity of the button. These plots are saved in "combined_plots". Args: rel_path: the study name from where to get the subject_view plots Returns: """ assert "CTS" in rel_path, "This method is only valid for CTS dataset" plot_dir_path = util.get_root_path("Results") if plot_dir_path is not None: # plot_dir_path = plot_dir_path + "/" + study_name + "/dataset plots/subject_view" plot_dir_path = plot_dir_path + rel_path img_list = util.get_files_in_dir(plot_dir_path) dict = {} dict['col1'] = (1, 13, 25) dict['col2'] = (2, 14, 26) dict['col3'] = (3, 15, 27) dict['col4'] = (4, 16, 28) dict['col5'] = (5, 17, 29) dict['col6'] = (6, 18, 30) dict['col7'] = (7, 19, 31) dict['col8'] = (8, 20, 32) dict['col9'] = (9, 21, 33) dict['col10'] = (10, 22, 34) dict['col11'] = (11, 23, 35) dict['col12'] = (12, 24, 36) figure_names = sorted(dict.keys()) print(figure_names) print(len(img_list)) j = 0 for col, buttons in dict.items(): img_sub_list = [] for i, elem in enumerate(buttons): pattern = '_' + str(elem) + '.png' for entry in img_list: if entry.endswith(pattern): img_sub_list.append(entry) print(figure_names[j]) self.create_figure(img_sub_list, figure_names[j] + "_spec.png", 1, 3) j = j + 1
def log_kmeans_score(self, nclusters_list, interval_size, name_extra_str=""): filepath = utils.get_root_path("Results") + "/" + parameters.study_name + "/codebook results/" filepath = utils.create_dir(filepath) + "cluster_eval.txt" with open(filepath, 'a') as the_file: for nclust in nclusters_list: codebook_alg_name = "_kmeans_" + str(nclust) dataset_desc_name = "CTS_firm_chunk_" + str(parameters.samples_per_chunk) + "_interval_" + str( interval_size) + name_extra_str feature_constructor.generate_codebook(subject_dict, dataset_desc_name, nclust) silouhette, calinski_harabaz = feature_constructor.score_kmeans(dataset_desc_name, dataset_desc_name + codebook_alg_name) the_file.write( 'Number of clusters: ' + str(nclust) + "; Interval size: " + str(interval_size) + ": Silouhette " "Score: " + str( silouhette) + "; Calinksi-Harabaz Score: " + str(calinski_harabaz) + "\n")