예제 #1
0
def get_tsfresh(data):
    dataset = Dataset(data_array=data, data_labels=data, BATCH_SIZE=BATCH_SIZE)
    extraction_settings = ComprehensiveFCParameters(
    )  #EfficientFCParameters()#MinimalFCParameters()#
    features_to_return = []
    start_time = time.time()
    eval_not_finished = 1
    while eval_not_finished != 0:
        # time_checked = check_times(times[i])
        data_batch, _ = dataset.get_batch_eval()
        batch_df = get_data_as_df(data_batch)
        X = extract_features(batch_df,
                             column_id='ids',
                             column_sort='time',
                             default_fc_parameters=extraction_settings,
                             impute_function=impute,
                             n_jobs=10)
        impute(X)
        fetures_batch = X.values
        features_to_return.append(fetures_batch)
        eval_not_finished = dataset.BATCH_COUNTER_EVAL
        if dataset.BATCH_COUNTER_EVAL % 100 == 0:
            time_usage = str(
                datetime.timedelta(seconds=int(round(time.time() -
                                                     start_time))))
            print("it %i Time usage: %s" %
                  (dataset.BATCH_COUNTER_EVAL, str(time_usage)),
                  flush=True)
    features_to_return = np.concatenate(features_to_return)
    time_usage = str(
        datetime.timedelta(seconds=int(round(time.time() - start_time))))
    print("Total Time usage: %s\n" % (str(time_usage)), flush=True)
    return features_to_return
 def select_channels(self, dataset: Dataset):
     if len(dataset.data_array.shape) == 3:
         dataset.data_array = dataset.data_array[np.newaxis, ...]
     selected_images_channels = dataset.data_array[...,
                                                   self.channels_to_select]
     if len(selected_images_channels.shape) == 3:
         selected_images_channels = selected_images_channels[...,
                                                             np.newaxis]
     dataset.data_array = selected_images_channels
     return dataset
 def labels_to_real_bogus(self, dataset: Dataset):
     bogus_label_value = self.params[param_keys.BOGUS_LABEL_VALUE]
     if bogus_label_value is None:
         label_values = np.unique(dataset.data_label)
         bogus_label_value = label_values[-1]
     bogus_indexes = np.where(dataset.data_label == bogus_label_value)[0]
     real_indexes = np.where(dataset.data_label != bogus_label_value)[0]
     dataset.data_label[bogus_indexes] = 0
     dataset.data_label[real_indexes] = 1
     if self.verbose:
         print('Labels changed to Real - Bogus\n%s' %
               (self._get_string_label_count(dataset.data_label,
                                             np.array(['bogus', 'real']))),
               flush=True)
     return dataset
 def normalize_by_image(self, dataset: Dataset):
     images = dataset.data_array
     images -= np.nanmin(images, axis=(1, 2))[:, np.newaxis, np.newaxis, :]
     images = images / np.nanmax(images, axis=(1, 2))[:, np.newaxis,
                                                      np.newaxis, :]
     dataset.data_array = images
     return dataset
 def normalize_by_sample(self, dataset: Dataset):
     images = dataset.data_array
     images -= np.nanmin(images, axis=(1, 2, 3))[..., np.newaxis,
                                                 np.newaxis, np.newaxis]
     images = images / np.nanmax(
         images, axis=(1, 2, 3))[..., np.newaxis, np.newaxis, np.newaxis]
     dataset.data_array = images
     return dataset
예제 #6
0
def merge_datasets_dict(datasets_dict1, datasets_dict2):
  merged_datasets_dict = {}
  for set in datasets_dict1.keys():
    data_array = np.concatenate([datasets_dict1[set].data_array,
                                 datasets_dict2[set].data_array])
    data_label = np.concatenate([datasets_dict1[set].data_label,
                                 datasets_dict2[set].data_label])
    merged_datasets_dict[set] = Dataset(data_array, data_label, batch_size=50)
  return merged_datasets_dict
 def nan_to_num(self, dataset: Dataset):
     samples = dataset.data_array
     nans_sample_idx = self._get_nans_samples_idx(samples)
     if self.verbose:
         print('%i samples with NaNs. NaNs replaced with number %s' %
               (len(nans_sample_idx), str(self.number_to_replace_nans)))
     samples[np.isnan(samples)] = self.number_to_replace_nans
     dataset.data_array = samples
     return dataset
예제 #8
0
def generated_images_to_dataset(gen_imgs, label=1):
  dataset = Dataset(data_array=gen_imgs,
                    data_label=np.ones(gen_imgs.shape[0]) * label,
                    batch_size=50)
  data_splitter = DatasetDivider(test_size=0.12, validation_size=0.08)
  data_splitter.set_dataset_obj(dataset)
  train_dataset, test_dataset, val_dataset = \
    data_splitter.get_train_test_val_set_objs()
  datasets_dict = {
    general_keys.TRAIN: train_dataset,
    general_keys.VALIDATION: val_dataset,
    general_keys.TEST: test_dataset
  }
  return datasets_dict
 def crop_at_center(self, dataset: Dataset):
     if self.crop_size is None:
         return dataset
     samples = dataset.data_array
     assert (samples.shape[1] % 2 == self.crop_size % 2)
     center = int((samples.shape[1]) / 2)
     crop_side = int(self.crop_size / 2)
     crop_begin = center - crop_side
     if samples.shape[1] % 2 == 0:
         crop_end = center + crop_side
     elif samples.shape[1] % 2 == 1:
         crop_end = center + crop_side + 1
     # print(center)
     # print(crop_begin, crop_end)
     cropped_samples = samples[:, crop_begin:crop_end,
                               crop_begin:crop_end, :]
     dataset.data_array = cropped_samples
     return dataset
 def clean_misshaped(self, dataset: Dataset):
     samples_clone = list(dataset.data_array[:])
     labels_clone = list(dataset.data_label[:])
     metadata_clone = list(dataset.meta_data[:])
     miss_shaped_sample_idx = self._get_misshaped_samples_idx(samples_clone)
     for index in sorted(miss_shaped_sample_idx, reverse=True):
         samples_clone.pop(index)
         labels_clone.pop(index)
         metadata_clone.pop(index)
     if self.verbose:
         print('%i misshaped samples removed\n%s' %
               (len(miss_shaped_sample_idx),
                self._get_string_label_count(labels_clone)),
               flush=True)
     dataset = Dataset(data_array=samples_clone,
                       data_label=labels_clone,
                       meta_data=metadata_clone,
                       batch_size=dataset.batch_size)
     return dataset
 def clean_nans(self, dataset: Dataset):
     samples_clone = list(dataset.data_array[:])
     labels_clone = list(dataset.data_label[:])
     metadata_clone = list(dataset.meta_data[:])
     nans_sample_idx = self._get_nans_samples_idx(samples_clone)
     self._check_nan_all_removed(samples_clone, nans_sample_idx)
     for index in sorted(nans_sample_idx, reverse=True):
         samples_clone.pop(index)
         labels_clone.pop(index)
         metadata_clone.pop(index)
     if self.verbose:
         print('%i samples with NaNs removed\n%s' %
               (len(nans_sample_idx),
                self._get_string_label_count(labels_clone)),
               flush=True)
     dataset = Dataset(data_array=samples_clone,
                       data_label=labels_clone,
                       batch_size=dataset.batch_size,
                       meta_data=metadata_clone)
     return dataset
 def check_single_image(self, dataset: Dataset):
     if len(dataset.data_array.shape) == 3:
         dataset.data_array = dataset.data_array[np.newaxis, ...]
     return dataset
 def _dict_to_dataset(self, data_dict):
     dataset = Dataset(data_array=data_dict[general_keys.IMAGES],
                       data_label=data_dict[general_keys.LABELS],
                       batch_size=self.batch_size)
     return dataset