def create_groundtruth_mask_lake(data_dir, used_band, year_range, n_data_per_year, day_period, groundtruth_mask_lake_dir, resize_input): for year in range(year_range[0], year_range[1] + 1): for d in range(n_data_per_year): day = d * day_period + 1 prefix = os.path.join(str(year), str(year) + str(day).zfill(3)) current_data_dir = os.path.join(data_dir, prefix) try: list_imgs = os.listdir(current_data_dir) band_filename = list( filter(lambda x: used_band in x, list_imgs))[0] img = rio.open(os.path.join(current_data_dir, band_filename), 'r').read(1) if resize_input: img = img[:resize_input, :resize_input] groundtruth_mask_lake = mask_lake_img(img, offset=1000) cur_mask_data_dir = os.path.join(groundtruth_mask_lake_dir, prefix) if not os.path.exists(cur_mask_data_dir): os.makedirs(cur_mask_data_dir) cache_data(groundtruth_mask_lake, os.path.join(cur_mask_data_dir, 'masked.dat')) except: print('Not found band {} in {}{:03} in {}.'.format( used_band, year, day, current_data_dir))
def create_predict_mask_lake(modis_utils_obj, data_type='test'): for idx in range(modis_utils_obj.get_n_tests(data_type)): pred = modis_utils_obj.get_inference(data_type, idx) pred = modis_utils_obj._preprocess_strategy_context.inverse(pred) predict_mask_lake_path = os.path.join( modis_utils_obj._predict_mask_lake_dir, data_type, '{}.dat'.format(idx)) cache_data(mask_lake_img(pred), predict_mask_lake_path)
def inference_all(sess, eclm, test_filenames, batch_size=1, keep_rate=1.0, inference_dir=None): if inference_dir: if not os.path.exists(inference_dir): os.makedirs(inference_dir) n_tests = len(test_filenames) steps = math.ceil(steps/batch_size) test_losses = [] VERBOSE_STEP = eclm.verbose_step for i in range(steps): x, y, pw, y_pw = restore_data_batch(test_filenames[i*batch_size: (i + 1)*batch_size]) loss, y_hat = sess.run([eclm.loss, eclm.y_hat], feed_dict={eclm.x: x, eclm.y: y, eclm.pw: pw, eclm.y_pw: y_pw, eclm.is_training: False, eclm.keep_rate: keep_rate}) if inference_dir: cache_data(y_hat, os.path.join(inference_dir, '{}.dat'.format(i))) if i % VERBOSE_STEP == 0: print(' test_{} - test_loss = {:0.7f}'.format(i, loss)) test_losses.append(loss) test_losses = np.asarray(test_losses) avg_test_loss = np.mean(test_losses) print('Average Test Loss: {:0.7f}'.format(avg_test_loss)) if inference_dir: cache_data(test_losses, os.path.join(inference_dir, 'loss.dat'))
def inference(sess, eclm, test_inputs, pw_inputs, keep_rate=1.0, inference_path=None): y_hat = sess.run([eclm.y_hat], feed_dict={eclm.x: test_inputs, eclm.pw: pw_inputs, eclm.is_training: False, eclm.keep_rate: keep_rate}) if inference_path: cache_data(y_hat, inference_path) return y_hat
def gen_data_2(data_dir, list_filenames, permanent_water_area, water_threshold, patch_size, output_dir): for filename in list_filenames: output_path = os.path.join(output_dir, filename) input_path = os.path.join(data_dir, filename) sequence_data = restore_data(input_path) res = gen_data_1(sequence_data, water_threshold, patch_size) cache_data(res, output_path)
def inference(self, data_type='test', idx=0, model=None): if self.inference_model is None: if model is None: model = self._model self.inference_model = model assert self.inference_model is not None output = self.model_utils.inference(self, data_type, idx) cache_data(output, self._get_inference_path(data_type, idx)) return output
def main(): m = n // 4 if gpu_id < 3: list_idx = list(range(gpu_id*m, (gpu_id + 1)*m)) else: list_idx = list(range(gpu_id*m, n)) res = [] for idx in list_idx: res_idx = predict_multisteps_single_point(input_test[:, :, idx : idx+1], idx, steps, batch_size, gpu_id) res.append(np.expand_dims(res_idx, axis=-1)) with open('tmp/log_{}.txt'.format(gpu_id), 'a') as f: f.write(str(idx) + '\n') cache_data(res, 'tmp/out_{}.dat'.format(gpu_id))
def _generate_on_boundaries(data_paths, target_paths, mask_paths, data_augment_dir, crop_size, n_samples, input_time_steps, output_timesteps): n_data = len(data_paths) cnt = 0 half_crop_size = crop_size // 2 for k in range(n_data): data_merged = get_data_merged_from_paths(data_paths[k], target_paths[k], mask_paths[k]) target_img = data_merged[0, -2 * output_timesteps, :, :] h, w = data_merged.shape[2:4] already = set() boundaries = find_boundaries(target_img) pos = np.where(boundaries) n_pos = len(pos[0]) ii = 1 while n_pos < n_samples: target_img = data_merged[0, -2 * output_timesteps + ii, :, :] already = set() boundaries = find_boundaries(target_img) pos = np.where(boundaries) n_pos = len(pos[0]) ii += 1 for i in range(n_samples): while True: offset_x = pos[0][np.random.randint(n_pos)] offset_y = pos[1][np.random.randint(n_pos)] if offset_x + half_crop_size + 1 < w and offset_y + half_crop_size + 1 < h and \ offset_x - half_crop_size >= 0 and offset_y - half_crop_size >= 0 and \ (offset_x, offset_y) not in already: break already.add((offset_x, offset_y)) batch = _random_crop_func(data_merged, offset_x - half_crop_size, offset_y - half_crop_size, crop_size=crop_size) for j in range(batch.shape[0]): cur = batch[j] data = np.expand_dims(cur[:input_time_steps], axis=-1) target = np.expand_dims( cur[-2 * output_timesteps:-output_timesteps], axis=-1) mask = np.expand_dims(cur[-output_timesteps:], axis=-1) target_mask = np.concatenate((target, mask), axis=-1) if not os.path.isdir(data_augment_dir): os.makedirs(data_augment_dir) file_path = os.path.join(data_augment_dir, '{}.dat'.format(cnt)) cnt += 1 cache_data((data, target_mask), file_path)
def _merge_last_data_augment(n_data, data_index_shuffle, list_data, merge_data_dir, batch_size, thread_id, n_threads): m = n_data // batch_size k = m - m % n_threads i = k + thread_id merge_data = [] merge_target_mask = [] for j in data_index_shuffle[i * batch_size:(i + 1) * batch_size]: data = restore_data(list_data[j]) merge_data.append(np.expand_dims(data[0], axis=0)) if data[1].shape[0] > 1: data_1 = np.expand_dims(data[1], axis=0) else: data_1 = data[1] merge_target_mask.append(data_1) if len(merge_data) == batch_size: merge_data = np.vstack(merge_data) merge_target_mask = np.vstack(merge_target_mask) merge_data_path = os.path.join(merge_data_dir, '{}.dat'.format(i)) cache_data((merge_data, merge_target_mask), merge_data_path)
def create_one_only_mask(data_dir, used_band, year_range, n_data_per_year, day_period, mask_data_dir, resize_input): for year in range(year_range[0], year_range[1] + 1): for d in range(n_data_per_year): day = d * day_period + 1 prefix = os.path.join(str(year), str(year) + str(day).zfill(3)) current_data_dir = os.path.join(data_dir, prefix) try: data = restore_data( os.path.join(current_data_dir, 'masked.dat')) mask = np.ones_like(data) if resize_input: mask = mask[:resize_input, :resize_input] cur_mask_data_dir = os.path.join(mask_data_dir, prefix) if not os.path.exists(cur_mask_data_dir): os.makedirs(cur_mask_data_dir) cache_data(mask, os.path.join(cur_mask_data_dir, 'masked.dat')) except: print('Not found band {} in {}{:03} in {}.'.format( used_band, year, day, current_data_dir))
def change_fill_value(data_dir, used_band, year_range, n_data_per_year, day_period, change_fill_value_data_dir): for year in range(year_range[0], year_range[1] + 1): for d in range(n_data_per_year): day = d * day_period + 1 prefix = os.path.join(str(year), str(year) + str(day).zfill(3)) current_data_dir = os.path.join(data_dir, prefix) try: list_imgs = os.listdir(current_data_dir) band_filename = list( filter(lambda x: used_band in x, list_imgs))[0] img = get_im(os.path.join(current_data_dir, band_filename)) img[img == -3000] = -2001 cur_dest_dir = os.path.join(change_fill_value_data_dir, prefix) if not os.path.exists(cur_dest_dir): os.makedirs(cur_dest_dir) cache_data(img, os.path.join(cur_dest_dir, 'change_fill_value.dat')) except: print('Not found band {} in {}{:03} in {}.'.format( used_band, year, day, current_data_dir))
def gen_data_1(sequence_data, permanent_water_area, water_threshold, patch_size=32, output_path=None): inputs, targets, inputs_pw, targets_pw = sequence_data last_inputs_pw = inputs[-1] a = find_boundaries_mask_lake(last_inputs_pw, water_threshold) list_center_pos = get_pos(a, center_point_xs) patches_inputs = [] patches_targets = [] patches_inputs_pw = [] patches_targets_pw = [] outputs = [ patches_inputs, patches_targets, patches_inputs_pw, patches_targets_pw ] def padding(x, sz): res = np.zeros((sz, sz)) res[:x.shape[0], :x.shape[1]] = x return res for center_pos in list_center_pos: if len(center_pos) < 2: continue x1, x2, y1, y2 = get_patch_coor(center_pos, patch_size) for origin, patches in zip(sequence_data, outputs): patch = origin[:, x1:x2 + 1, y1:y2 + 1] if x2 - x1 < patch_size - 1 or y2 - y1 < patch_size - 1: patch = padding(patch, patch_size) patches.append(np.expand_dims(patch, axis=0)) res = [] for patches in outputs: res.append(np.vstack(patches)) if output_path is not None: cache_data(res, output_path) return res
def _generate_whole_image(data_paths, target_paths, mask_paths, data_augment_dir, input_time_steps, output_timesteps): n_data = len(data_paths) for k in range(n_data): data_merged = get_data_merged_from_paths(data_paths[k], target_paths[k], mask_paths[k]) data = data_merged[:, :-2 * output_timesteps, :, :] data = np.expand_dims(data, axis=-1) target = data_merged[:, -2 * output_timesteps:-output_timesteps, :, :] target = np.expand_dims(data, axis=-1) mask = data_merged[:, -output_timesteps:, :, :] mask = np.expand_dims(data, axis=-1) if target.shape[1] > 1: target = target.squeeze(axis=1) mask = mask.squeeze(axis=1) target_mask = np.concatenate((target, mask), axis=-1) if not os.path.isdir(data_augment_dir): os.makedirs(data_augment_dir) file_path = os.path.join(data_augment_dir, '{}.dat'.format(k)) cache_data((data, target_mask), file_path)
def _preprocess_data(self, data_dir, used_band, year_range, n_data_per_year, day_period, preprocessed_data_dir, resize_input=None): for year in range(year_range[0], year_range[1] + 1): for d in range(n_data_per_year): day = d*day_period + 1 prefix = os.path.join(str(year), str(year) + str(day).zfill(3)) current_data_dir = os.path.join(data_dir, prefix) try: list_imgs = os.listdir(current_data_dir) filename = list(filter(lambda x: used_band in x, list_imgs))[0] img = restore_data(os.path.join(current_data_dir, filename)) normalized_img = self.fn(img) if resize_input: normalized_img = normalized_img[:resize_input, :resize_input] cur_dest_dir = os.path.join(preprocessed_data_dir, prefix) if not os.path.exists(cur_dest_dir): os.makedirs(cur_dest_dir) cache_data(normalized_img, os.path.join(cur_dest_dir, 'preprocessed.dat')) except: print('Not found data {}{:03} in {}.'.format( year, day, current_data_dir))
def _generate(data_paths, target_paths, mask_paths, data_augment_dir, crop_size, n_samples, input_time_steps, output_timesteps): n_data = len(data_paths) cnt = 0 for k in range(n_data): data_merged = get_data_merged_from_paths(data_paths[k], target_paths[k], mask_paths[k]) for i in range(n_samples): batch = _random_crop_func_1(data_merged, crop_size) for j in range(batch.shape[0]): cur = batch[j] data = np.expand_dims(cur[:input_time_steps], axis=-1) target = np.expand_dims( cur[-2 * output_timesteps:-output_timesteps], axis=-1) mask = np.expand_dims(cur[-output_timesteps:], axis=-1) target_mask = np.concatenate((target, mask), axis=-1) if not os.path.isdir(data_augment_dir): os.makedirs(data_augment_dir) file_path = os.path.join(data_augment_dir, '{}.dat'.format(cnt)) cnt += 1 cache_data((data, target_mask), file_path)
def create_sequence_data(data_file_paths, n_samples=10000): if not os.path.exists(sequence_data_dir): os.makedirs(sequence_data_dir) for data_type, n_data in zip(('train', 'val', 'test'), (n_train, n_val, n_test)): sequence_data_type_dir = os.path.join(sequence_data_dir, data_type) if not os.path.exists(sequence_data_type_dir): os.makedirs(sequence_data_type_dir) data_type_file_paths = data_file_paths[data_type] input_file = data_type_file_paths['input'] target_file = data_type_file_paths['target'] pw_target_file = data_type_file_paths['pw_target'] for i in range(min(n_data, n_samples)): inputs = select_data(get_data_from_data_file(input_file, i)) target = get_data_from_data_file(target_file, i) input_pixel_weights = np.array( list( map(lambda x: get_pixel_weights(x, water_threshold), inputs))) target_pixel_weights = get_data_from_data_file(pw_target_file, i) cache_data( (inputs, target, input_pixel_weights, target_pixel_weights), os.path.join(sequence_data_type_dir, '{}.dat'.format(i)))
_, results, _, _ = load_data_pickle(results) res.append(results.forecast(steps)) return pd.concat(res, ignore_index=True, axis=1) def eval(self, groundtruth, metric=None): steps = groundtruth.shape[0] yhat = self.inference(steps) if metric is None: metric = mse return yhat, metric(groundtruth, yhat) # In[71]: vsarima = VSARIMA(df_train_12, df_test_12) #vsarima = VSARIMA(df_train, df_test) # In[72]: vsarima.train() # In[85]: yhat, loss = vsarima.eval(df_test_12) #yhat, loss = vsarima.eval(df_test) print('mse =', loss) cache_data(yhat, 'vsarima_inference.dat') account.upload_file('vsarima_inference.dat', 'MODIS') account.upload_file('log.csv', 'MODIS')
# In[ ]: lstm_2 = LSTM_2(data, data_train_1, list_idx, scaler, mode='inference') losses, predictions = lstm_2.eval(return_original_range=True) print(losses) print(losses.mean()) # In[ ]: inference_dir = 'inference' if not os.path.exists(inference_dir): os.makedir(inference_dir) cache_data(predictions, os.path.join(inference_dir, 'lstm_2.dat')) # In[ ]: inputs = lstm_1.data['test_X'][:1] groundtruths = lstm_1.data['test_y'][:1] inputs.shape, groundtruths.shape # In[ ]: predictions.shape
# In[153]: vsarima_1 = VSARIMA(data_train_1, data_test_1, list_idx, mode='inference') losses, predictions = vsarima_1.eval(data_test_1) # In[155]: print(losses.mean()) # In[154]: inference_dir = 'inference' if not os.path.exists(inference_dir): os.makedirs(inference_dir) cache_data(predictions, os.path.join(inference_dir, 'sarima.dat')) # # Calculate Polygon area # In[166]: predictions = predictions.reshape(predictions.shape[0], -1, 2) # In[167]: predictions.shape, data_test.shape # In[171]: def find_border(data_points):
# In[14]: inputs_np_whole_img = restore_data( '../../multiscale_predrnn/data/sequence_data/test/0.dat')[0] inputs_np_whole_img.shape # In[15]: inference_convlstm_whole_img = InferenceConvLSTMWholeImg(params) # In[19]: inferences_np = {} np_input_dir = '../../multiscale_predrnn/data/sequence_data' steps_ahead = 80 for subset in ('test', 'val'): inference_dir = 'inferences/{}'.format(subset) if not os.path.exists(inference_dir): os.makedirs(inference_dir) np_input_dir_subset = os.path.join(np_input_dir, subset) n = len(os.listdir(np_input_dir_subset)) res1 = [] for i in tqdm(range(n)): inputs_np = restore_data( os.path.join(np_input_dir_subset, '{}.dat'.format(i)))[0] inputs_np = inputs_np[-timesteps:] res = inference_convlstm_whole_img.get_inference_from_np_array( inputs_np, steps_ahead) cache_data(res, os.path.join(inference_dir, '{}.dat'.format(i)))