def get_tsfresh(data): dataset = Dataset(data_array=data, data_labels=data, BATCH_SIZE=BATCH_SIZE) extraction_settings = ComprehensiveFCParameters( ) #EfficientFCParameters()#MinimalFCParameters()# features_to_return = [] start_time = time.time() eval_not_finished = 1 while eval_not_finished != 0: # time_checked = check_times(times[i]) data_batch, _ = dataset.get_batch_eval() batch_df = get_data_as_df(data_batch) X = extract_features(batch_df, column_id='ids', column_sort='time', default_fc_parameters=extraction_settings, impute_function=impute, n_jobs=10) impute(X) fetures_batch = X.values features_to_return.append(fetures_batch) eval_not_finished = dataset.BATCH_COUNTER_EVAL if dataset.BATCH_COUNTER_EVAL % 100 == 0: time_usage = str( datetime.timedelta(seconds=int(round(time.time() - start_time)))) print("it %i Time usage: %s" % (dataset.BATCH_COUNTER_EVAL, str(time_usage)), flush=True) features_to_return = np.concatenate(features_to_return) time_usage = str( datetime.timedelta(seconds=int(round(time.time() - start_time)))) print("Total Time usage: %s\n" % (str(time_usage)), flush=True) return features_to_return
def select_channels(self, dataset: Dataset): if len(dataset.data_array.shape) == 3: dataset.data_array = dataset.data_array[np.newaxis, ...] selected_images_channels = dataset.data_array[..., self.channels_to_select] if len(selected_images_channels.shape) == 3: selected_images_channels = selected_images_channels[..., np.newaxis] dataset.data_array = selected_images_channels return dataset
def labels_to_real_bogus(self, dataset: Dataset): bogus_label_value = self.params[param_keys.BOGUS_LABEL_VALUE] if bogus_label_value is None: label_values = np.unique(dataset.data_label) bogus_label_value = label_values[-1] bogus_indexes = np.where(dataset.data_label == bogus_label_value)[0] real_indexes = np.where(dataset.data_label != bogus_label_value)[0] dataset.data_label[bogus_indexes] = 0 dataset.data_label[real_indexes] = 1 if self.verbose: print('Labels changed to Real - Bogus\n%s' % (self._get_string_label_count(dataset.data_label, np.array(['bogus', 'real']))), flush=True) return dataset
def normalize_by_image(self, dataset: Dataset): images = dataset.data_array images -= np.nanmin(images, axis=(1, 2))[:, np.newaxis, np.newaxis, :] images = images / np.nanmax(images, axis=(1, 2))[:, np.newaxis, np.newaxis, :] dataset.data_array = images return dataset
def normalize_by_sample(self, dataset: Dataset): images = dataset.data_array images -= np.nanmin(images, axis=(1, 2, 3))[..., np.newaxis, np.newaxis, np.newaxis] images = images / np.nanmax( images, axis=(1, 2, 3))[..., np.newaxis, np.newaxis, np.newaxis] dataset.data_array = images return dataset
def merge_datasets_dict(datasets_dict1, datasets_dict2): merged_datasets_dict = {} for set in datasets_dict1.keys(): data_array = np.concatenate([datasets_dict1[set].data_array, datasets_dict2[set].data_array]) data_label = np.concatenate([datasets_dict1[set].data_label, datasets_dict2[set].data_label]) merged_datasets_dict[set] = Dataset(data_array, data_label, batch_size=50) return merged_datasets_dict
def nan_to_num(self, dataset: Dataset): samples = dataset.data_array nans_sample_idx = self._get_nans_samples_idx(samples) if self.verbose: print('%i samples with NaNs. NaNs replaced with number %s' % (len(nans_sample_idx), str(self.number_to_replace_nans))) samples[np.isnan(samples)] = self.number_to_replace_nans dataset.data_array = samples return dataset
def generated_images_to_dataset(gen_imgs, label=1): dataset = Dataset(data_array=gen_imgs, data_label=np.ones(gen_imgs.shape[0]) * label, batch_size=50) data_splitter = DatasetDivider(test_size=0.12, validation_size=0.08) data_splitter.set_dataset_obj(dataset) train_dataset, test_dataset, val_dataset = \ data_splitter.get_train_test_val_set_objs() datasets_dict = { general_keys.TRAIN: train_dataset, general_keys.VALIDATION: val_dataset, general_keys.TEST: test_dataset } return datasets_dict
def crop_at_center(self, dataset: Dataset): if self.crop_size is None: return dataset samples = dataset.data_array assert (samples.shape[1] % 2 == self.crop_size % 2) center = int((samples.shape[1]) / 2) crop_side = int(self.crop_size / 2) crop_begin = center - crop_side if samples.shape[1] % 2 == 0: crop_end = center + crop_side elif samples.shape[1] % 2 == 1: crop_end = center + crop_side + 1 # print(center) # print(crop_begin, crop_end) cropped_samples = samples[:, crop_begin:crop_end, crop_begin:crop_end, :] dataset.data_array = cropped_samples return dataset
def clean_misshaped(self, dataset: Dataset): samples_clone = list(dataset.data_array[:]) labels_clone = list(dataset.data_label[:]) metadata_clone = list(dataset.meta_data[:]) miss_shaped_sample_idx = self._get_misshaped_samples_idx(samples_clone) for index in sorted(miss_shaped_sample_idx, reverse=True): samples_clone.pop(index) labels_clone.pop(index) metadata_clone.pop(index) if self.verbose: print('%i misshaped samples removed\n%s' % (len(miss_shaped_sample_idx), self._get_string_label_count(labels_clone)), flush=True) dataset = Dataset(data_array=samples_clone, data_label=labels_clone, meta_data=metadata_clone, batch_size=dataset.batch_size) return dataset
def clean_nans(self, dataset: Dataset): samples_clone = list(dataset.data_array[:]) labels_clone = list(dataset.data_label[:]) metadata_clone = list(dataset.meta_data[:]) nans_sample_idx = self._get_nans_samples_idx(samples_clone) self._check_nan_all_removed(samples_clone, nans_sample_idx) for index in sorted(nans_sample_idx, reverse=True): samples_clone.pop(index) labels_clone.pop(index) metadata_clone.pop(index) if self.verbose: print('%i samples with NaNs removed\n%s' % (len(nans_sample_idx), self._get_string_label_count(labels_clone)), flush=True) dataset = Dataset(data_array=samples_clone, data_label=labels_clone, batch_size=dataset.batch_size, meta_data=metadata_clone) return dataset
def check_single_image(self, dataset: Dataset): if len(dataset.data_array.shape) == 3: dataset.data_array = dataset.data_array[np.newaxis, ...] return dataset
def _dict_to_dataset(self, data_dict): dataset = Dataset(data_array=data_dict[general_keys.IMAGES], data_label=data_dict[general_keys.LABELS], batch_size=self.batch_size) return dataset