Ejemplo n.º 1
0
    def get_train_dataloader(self, subj_dataset):
        if self.learning_def.nn_name == "MLP":
            data, cat = self.get_all_subj_data(subj_dataset, seq=False)
        else:
            data, cat = self.get_all_subj_data(subj_dataset)

        # convert numpy ndarray to PyTorch tensor
        np_data = np.asarray(data, dtype=np.float32)
        data = torch.from_numpy(np_data)

        # convert categories from string to integer
        labels = utils.convert_categories(self.category_map, cat)

        button_labels = torch.from_numpy(labels)
        # the tensor_dataset is a tuple of TensorDataset type, containing a tensor with data (train or val),
        # and one with labels (train or val respectively)

        row_labels, column_labels = self.knitted_component.get_row_column_labels(
            button_labels)
        row_labels = torch.from_numpy(row_labels)

        # standardized_data = self.standardize(data)
        dataset = TensorDataset(data, row_labels)
        data_loader = DataLoader(dataset,
                                 batch_size=self.learning_def.batch_size,
                                 num_workers=self.parameters.num_threads,
                                 shuffle=False,
                                 pin_memory=False)

        return data_loader
    def compute_distance_seq(self, all_dataset_categories: List[str]):

        for subj_name, subj in self.dataset_descriptors.items():
            subj_data = subj.data
            subj_cat = subj.categories
            cat_map = utils.map_categories(all_dataset_categories)
            subj_int_cat = utils.convert_categories(cat_map, subj_cat)

            # for testing purposes
            l1 = []
            l2 = []

            tuple_list = []
            # for i in range(0, len(subj_data)):
            #     for j in range(0, len(subj_data)):
            for i in range(4, 17):
                for j in range(4, 17):
                    keypress1 = subj_data[i]
                    cat1 = subj_int_cat[i]

                    keypress2 = subj_data[j]
                    cat2 = subj_int_cat[j]

                    # for testing purposes
                    lev_dist_1 = self.distance.compute_distance(keypress1, keypress2)
                    l1.append(lev_dist_1)
                    lev_dist_2 = self.distance.compute_distance(keypress2, keypress1)
                    l2.append(lev_dist_2)

            lists_same = (l1 == l2)
            zero_in_l1 = (0 in l1)
            print(f"Are lists the same: {lists_same}")
            print(f"Does l1 contain a 0? {zero_in_l1}")
            print()
    def compute_heatmap(self, all_dataset_categories: List[str]) -> None:
        """
        Computes pairwise distances of all tensors in the descriptors (internal to the class) and accumulates the sum
        of the distances among pairs of all categories into a heatmap.

        Args:
            all_dataset_categories (list): a list of all categories of the subject dataset.

        Returns: None

        """
        print("\nComputing evaluation matrix...")

        if not os.path.exists(self.get_heatmap_obj_path()):
            start_time = time.time()
            for subj_name, subj in self.dataset_descriptors.items():
                subj_data = subj.data
                subj_cat = subj.categories
                cat_map = utils.map_categories(all_dataset_categories)
                subj_int_cat = utils.convert_categories(cat_map, subj_cat)

                tuple_list = []
                for i in range(0, len(subj_data)):
                    for j in range(0, len(subj_data)):
                # for i in range(4, 7):
                #     for j in range(4, 7):
                        keypress1 = subj_data[i]
                        cat1 = subj_int_cat[i]

                        keypress2 = subj_data[j]
                        cat2 = subj_int_cat[j]

                        tuple_list.append((keypress1, cat1, keypress2, cat2))

                print(f"Id of heatmap as seen by main: {hex(id(heatmap_global))}")
                print(f"Id of heatmap as seen by compute_heatmap() method: {hex(id(self.heatmap))}")
                print(f"Total number of tensors to compare is {len(tuple_list)}")

                with multiprocessing.Pool(processes=self.num_processes) as pool:
                    pool.map(self.compute_distance_parallelized, tuple_list)
                pool.close()
                pool.join()
            duration_with_pool = utils.time_since(start_time)

            print(f"\nComputed dataset evaluation heatmap for {len(subject_dataset)} subject(s), "
                  f"using {self.num_processes} processes, for a duration of {duration_with_pool}\n")

            self.save(self.heatmap, ".pkl")
        else:
            print("Opening existing heatmap...")
            with (open(self.get_heatmap_obj_path(), "rb")) as openfile:
                self.__heatmap = pickle.load(openfile)

        if self.heatmap is not None:
            plt.figure(figsize=(14, 10))
            sns.set(font_scale=1.4)
            heatmap_fig = sns.heatmap(self.heatmap, xticklabels=5, yticklabels=5)
            self.save(heatmap_fig, ".png")

        print(f"\nEnd of descriptor evaluator {self.dataset_eval_name}!\n")
Ejemplo n.º 4
0
    def compute_label_msd_dict(self, subject_dict: types.subj_dataset, fold):
        descriptor_computer = DescriptorComputer(self.desc_type, subject_dict, self.parameters,
                                                 self.seq_len, extra_name="_fold_" + str(fold))

        descriptors = descriptor_computer.produce_dataset_descriptors(subject_dict)
        all_data, all_labels = self.get_all_subj_data(descriptors)
        labels = utils.convert_categories(self.category_map, all_labels)

        # list of nd arrays of index locations of each label
        index_sets = [np.argwhere(i[0] == labels) for i in np.array(np.unique(labels, return_counts=True)).T]

        # build dictionary
        msd_label_dict = {}
        for label in labels:
            all_locations = index_sets[label]
            all_locations_ls = np.squeeze(all_locations, axis=1).tolist()
            label_instances = [all_data[i] for i in all_locations_ls]
            msd_label_dict[label] = label_instances

        return msd_label_dict
Ejemplo n.º 5
0
    def _get_data_and_labels(self, subj_dataset):
        if self.learning_def.nn_name == "MLP":
            data, cat = self.get_all_subj_data(subj_dataset, seq=False)
        else:
            data, cat = self.get_all_subj_data(subj_dataset)

        # convert numpy ndarray to PyTorch tensor
        np_data = np.asarray(data, dtype=np.float32)
        data = torch.from_numpy(np_data)

        # convert categories from string to integer
        labels = utils.convert_categories(self.category_map, cat)

        # TODO: convert int_cat to yarn_positions by calling a function/property of touchpad
        if not self.parameters.classification:
            labels = self.knitted_component.get_button_centers(labels)

        row_labels, column_labels = self.knitted_component.get_row_column_labels(
            labels)
        row_labels = torch.from_numpy(row_labels)
        column_labels = torch.from_numpy(column_labels)

        # standardized_data = self.standardize(data)
        row_tensor_dataset = TensorDataset(standardized_data, row_labels)
        column_tensor_dataset = TensorDataset(standardized_data, column_labels)

        # DO NOT SHUFFLE either one !!!!! Later code relies on maintaining order
        row_data_loader = DataLoader(row_tensor_dataset,
                                     batch_size=self.learning_def.batch_size,
                                     num_workers=self.parameters.num_threads,
                                     shuffle=False,
                                     pin_memory=False)

        column_data_loader = DataLoader(
            column_tensor_dataset,
            batch_size=self.learning_def.batch_size,
            num_workers=self.parameters.num_threads,
            shuffle=False,
            pin_memory=False)

        return row_data_loader, column_data_loader