def get_predict_mask_lake(self, data_type='test', idx=0): predict_mask_lake_path = os.path.join(self._predict_mask_lake_dir, data_type, '{}.dat'.format(idx)) if os.path.exists(predict_mask_lake_path): return restore_data(predict_mask_lake_path) else: return None
def __getitem__(self, idx): batch_X = [] batch_input_imgs = [] batch_target_img = [] batch_input_pixel_weights = [] batch_target_pixel_weight = [] for i in range(idx * self.batch_size, (idx + 1) * self.batch_size): data = restore_data(self.data_filenames[i]) batch_input_imgs.append(data[0][np.newaxis, :, :, :, np.newaxis]) batch_target_img.append(data[1][np.newaxis, :, :, :, np.newaxis]) batch_input_pixel_weights.append(data[2][np.newaxis, :, :, :, np.newaxis]) batch_target_pixel_weight.append(data[3][np.newaxis, :, :, :, np.newaxis]) batch_X = [ np.vstack(batch_input_imgs), np.vstack(batch_input_pixel_weights) ] batch_y = np.concatenate([ np.vstack(batch_target_img), np.vstack(batch_target_pixel_weight) ], axis=-1) return batch_X, batch_y
def __getitem__(self, idx): data = restore_data(self.data_filenames[idx // self.k]) i = idx % self.k batch_X = data[0][i*self.batch_size:(i+1)*self.batch_size].astype(np.float32) batch_y = data[1][i*self.batch_size:(i+1)*self.batch_size].astype(np.float32) if self.pretrained: batch_X = np.tile(batch_X, 3) return batch_X, [batch_y, batch_X]
def gen_data_2(data_dir, list_filenames, permanent_water_area, water_threshold, patch_size, output_dir): for filename in list_filenames: output_path = os.path.join(output_dir, filename) input_path = os.path.join(data_dir, filename) sequence_data = restore_data(input_path) res = gen_data_1(sequence_data, water_threshold, patch_size) cache_data(res, output_path)
def restore_data_batch(list_path): res = [[], [], [], []] for path in list_path: data = restore_data(path) for x, t in zip(data, res): t.append(np.expand_dims(np.expand_dims(x, axis=0), axis=-1)) res_1 = [] for t in res: res_1.append(np.vstack(t)) return res_1
def gen_data(input_dir, list_filenames, data_type, permanent_water_area, water_threshold, patch_size, output_dir): input_dir = os.path.join(input_dir, data_type) output_dir = os.path.join(output_dir, data_type) for filename in list_filenames: output_path = os.path.join(output_dir, filename) input_path = os.path.join(input_dir, filename) sequence_data = restore_data(input_path) gen_data_1(sequence_data, permanent_water_area, water_threshold, patch_size, output_path)
def get_groundtruth_mask_lake(self, data_type='test', idx=0): yearday = self._get_yearday(data_type, idx) year = yearday[:-3] groundtruth_mask_lake_path = os.path.join( self._groundtruth_mask_lake_dir, year, yearday, 'masked.dat') if self._preprocessed_type == 'Zhang': groundtruth_mask_lake_path = os.path.join( self._groundtruth_mask_lake_dir, year, yearday, 'preprocessed.dat') if os.path.exists(groundtruth_mask_lake_path): return restore_data(groundtruth_mask_lake_path) else: return None
def __getitem__(self, idx): data = restore_data('output/{0}'.format(self.data_filenames[idx])) i = idx batch_X = np.expand_dims(np.expand_dims(data[:-1, :, :], axis=0), axis=-1) batch_Y = np.expand_dims(np.expand_dims(data[-1, :, :], axis=0), axis=-1) __max__ = 17.0 __min__ = -34.0 __range__ = __max__ - __min__ X = (batch_X - __min__) / __range__ Y = (batch_Y - __min__) / __range__ return (X, Y)
def get_predict_mask_lake(data_dir, used_band, crop_size, time_steps, filters, kernel_size, n_hidden_layers, mask_cloud_loss, reservoir_index, test_index): predict_dir = get_predict_dir(data_dir, reservoir_index, used_band, crop_size, time_steps, filters, kernel_size, n_hidden_layers, mask_cloud_loss) predict = restore_data(os.path.join(predict_dir, '{}.dat'.format(test_index))) if 'div' not in data_dir: reservoir_min, reservoir_max = get_reservoir_min_max(data_dir, reservoir_index) mean, std = get_reservoir_mean_std(data_dir, reservoir_index) predict = np.interp(predict, (np.min(predict), np.max(predict)), (reservoir_min, reservoir_max)) predict = predict*std + mean else: predict = np.interp(predict, (np.min(predict), np.max(predict)), (-2001, 10000)) predict_mask = mask_lake_img(predict, band=used_band) return predict_mask
def _merge_last_data_augment(n_data, data_index_shuffle, list_data, merge_data_dir, batch_size, thread_id, n_threads): m = n_data // batch_size k = m - m % n_threads i = k + thread_id merge_data = [] merge_target_mask = [] for j in data_index_shuffle[i * batch_size:(i + 1) * batch_size]: data = restore_data(list_data[j]) merge_data.append(np.expand_dims(data[0], axis=0)) if data[1].shape[0] > 1: data_1 = np.expand_dims(data[1], axis=0) else: data_1 = data[1] merge_target_mask.append(data_1) if len(merge_data) == batch_size: merge_data = np.vstack(merge_data) merge_target_mask = np.vstack(merge_target_mask) merge_data_path = os.path.join(merge_data_dir, '{}.dat'.format(i)) cache_data((merge_data, merge_target_mask), merge_data_path)
def create_one_only_mask(data_dir, used_band, year_range, n_data_per_year, day_period, mask_data_dir, resize_input): for year in range(year_range[0], year_range[1] + 1): for d in range(n_data_per_year): day = d * day_period + 1 prefix = os.path.join(str(year), str(year) + str(day).zfill(3)) current_data_dir = os.path.join(data_dir, prefix) try: data = restore_data( os.path.join(current_data_dir, 'masked.dat')) mask = np.ones_like(data) if resize_input: mask = mask[:resize_input, :resize_input] cur_mask_data_dir = os.path.join(mask_data_dir, prefix) if not os.path.exists(cur_mask_data_dir): os.makedirs(cur_mask_data_dir) cache_data(mask, os.path.join(cur_mask_data_dir, 'masked.dat')) except: print('Not found band {} in {}{:03} in {}.'.format( used_band, year, day, current_data_dir))
def _preprocess_data(self, data_dir, used_band, year_range, n_data_per_year, day_period, preprocessed_data_dir, resize_input=None): for year in range(year_range[0], year_range[1] + 1): for d in range(n_data_per_year): day = d*day_period + 1 prefix = os.path.join(str(year), str(year) + str(day).zfill(3)) current_data_dir = os.path.join(data_dir, prefix) try: list_imgs = os.listdir(current_data_dir) filename = list(filter(lambda x: used_band in x, list_imgs))[0] img = restore_data(os.path.join(current_data_dir, filename)) normalized_img = self.fn(img) if resize_input: normalized_img = normalized_img[:resize_input, :resize_input] cur_dest_dir = os.path.join(preprocessed_data_dir, prefix) if not os.path.exists(cur_dest_dir): os.makedirs(cur_dest_dir) cache_data(normalized_img, os.path.join(cur_dest_dir, 'preprocessed.dat')) except: print('Not found data {}{:03} in {}.'.format( year, day, current_data_dir))
def main(): parser = arg_parse() in_steps = gen_data.real_timesteps out_steps = gen_data.n_out mode = parser.mode sequence_data_dir = parser.data_dir #gpus = max(min(parser.gpus, len(get_available_gpus())), 1) gpus = 4 num_blocks = parser.num_blocks batch_size = parser.batch_size train_epochs = parser.train_epochs learning_rate = parser.lr verbose_step = parser.verbose_step n_ports = gpus devices = get_devices(n_ports) train_filenames = get_list_filenames(sequence_data_dir, 'train') val_filenames = get_list_filenames(sequence_data_dir, 'val') test_filenames = get_list_filenames(sequence_data_dir, 'test') assert os.path.isfile(train_filenames[0]) assert os.path.isfile(val_filenames[0]) assert os.path.isfile(test_filenames[0]) sample_data = restore_data(train_filenames[0])[0] img_shape = sample_data.shape[1:] ################### filters = [32, 16, 8, 1] ################### PORTS = [str(START_PORT + i) for i in range(n_ports)] task_idx = 0 workers = [IP_ADDRESS + ":" + PORT for PORT in PORTS] cluster_spec = tf.train.ClusterSpec({'worker': workers}) server = tf.train.Server(cluster_spec, job_name='worker', task_index=task_idx) print(server.server_def) if mode == 'train': eclm_1 = ECLM(in_steps, out_steps, img_shape, devices[:2], filters, num_blocks=num_blocks, starter_learning_rate=learning_rate, verbose_step=verbose_step) eclm_1.build_graph() eclm_2 = ECLM(in_steps, out_steps, img_shape, devices[2:], filters, num_blocks=num_blocks, starter_learning_rate=learning_rate, verbose_step=verbose_step) eclm_2.build_graph() with tf.device(devices[0]): init = tf.global_variables_initializer() train([eclm_1, eclm_2], train_filenames, val_filenames, server, init, train_epochs, batch_size) else: eclm = ECLM(in_steps, out_steps, img_shape, devices, filters, mode="inference", num_blocks=num_blocks, starter_learning_rate=learning_rate, verbose_step=verbose_step) eclm.build_graph() saver = tf.train.Saver() with tf.Session(server.target) as sess: ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoint/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) inference_dir = os.path.join('inference', str(out_steps)) inference_all(sess, eclm, test_filenames, inference_dir=inference_dir)
from matplotlib import pyplot as plt import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.python.keras.utils import plot_model from tensorflow.python.keras.callbacks import ModelCheckpoint from tensorflow.python.keras.callbacks import LearningRateScheduler, CSVLogger from modis_utils.misc import restore_data from modis_utils.modis_utils import ModisUtils from modis_utils.model.loss_function import PSNRLoss, lossSSIM, SSIM, step_decay from modis_utils.model.loss_function import mse_with_mask_tf, mse_with_mask_tf_1, mse_with_mask # Parameters config_path = 'config.dat' config_params = restore_data(config_path) lr = config_params['lr'] training = True crop_size = config_params['crop_size'] input_timesteps = config_params['input_timesteps'] output_timesteps = config_params['output_timesteps'] batch_size = config_params['batch_size'] compile_params = config_params['compile_params'] model_name = config_params['model_name'] preprocessed_type = config_params['preprocessed_type'] modis_product = config_params['modis_product'] monitor = config_params['monitor'] monitor_mode = config_params['monitor_mode'] resize_input = config_params['resize_input']
sys.path.append('../..') # In[2]: from modis_utils.misc import cache_data, restore_data from modis_utils.image_processing import mask_lake_img # In[3]: n_cores = 24 patch_size = 32 # In[4]: water_threshold = (0.1 + 0.2001) / 1.2001 percentile = restore_data('percentile/0.dat') center_point_xs = np.arange(16, 513, 32) center_point_xs # In[5]: permanent_water_area = np.where(percentile > 0.8, 1, 0) # # Utils functions # In[6]: def find_boundaries_mask_lake(x, water_threshold): x1 = mask_lake_img(x, offset=water_threshold) return find_boundaries(x1)
from keras.models import Model, load_model sys.path.append('..') from modis_utils.misc import cache_data, restore_data gpu_id = int(sys.argv[1]) input_path = sys.argv[2] batch_size = int(sys.argv[3]) steps = int(sys.argv[4]) model_paths_path = sys.argv[5] n = int(sys.argv[6]) os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] input_test = restore_data(input_path) model_paths = restore_data(model_paths_path) def predict_multisteps_single_point(input_single_point, point_id, steps, batch_size, gpu_id): K.clear_session() model = load_model(model_paths[point_id]) n_test = len(input_single_point) res = np.zeros((n_test, steps)) inputs = input_single_point.copy() for i in range(steps): predict = model.predict(inputs, batch_size=batch_size) inputs = np.concatenate([inputs[:, 1:, :], predict.reshape(-1, 1, 1)], axis=1) res[:, i:i+1] = predict return res def main():
def get_inference(self, data_type='test', idx=0, model=None): inference_path = self._get_inference_path(data_type, idx) if not os.path.exists(inference_path): self.inference(data_type, idx) return restore_data(inference_path)
import os import sys import numpy as np from skimage.segmentation import find_boundaries sys.path.append('../../../') from modis_utils.misc import cache_data, restore_data from modis_utils.image_processing import mask_lake_img water_threshold = (0.1 + 0.2001) / 1.2001 percentile = restore_data('../percentile/0.dat') center_point_xs = np.arange(16, 513, 32) permanent_water_area = np.where(percentile > 0.8, 1, 0) def select_img(list_imgs): n = len(list_imgs) res = list_imgs[0].copy() for img in list_imgs[1:]: res += img return res / n def select_data(sequence_data): res = [] for i in range(0, len(sequence_data) - 3, 4): selected_img = select_img(sequence_data[i:i + 4]) res.append(np.expand_dims(selected_img, axis=0)) res = np.vstack(res) res = np.vstack([res, sequence_data[-3:]])
from modis_utils.misc import cache_data, restore_data gpu_id = int(sys.argv[1]) data_path = sys.argv[2] epochs = int(sys.argv[3]) batch_size = int(sys.argv[4]) timesteps = int(sys.argv[5]) units = int(sys.argv[6]) n = int(sys.argv[7]) model_dir = sys.argv[8] training_fig_dir = sys.argv[9] os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] data = restore_data(data_path) def create_model(timesteps, units): input_shape = (timesteps, units) inputs = Input(input_shape) x = LSTM(units*4, return_sequences=True)(inputs) x = LSTM(units*4, return_sequences=True)(inputs) x = LSTM(units)(x) model = Model(inputs=inputs, outputs=x) model.compile(loss='mse', optimizer='adam') return model def train(data, i, epochs, batch_size, timesteps, units, model_dir, training_fig_dir): K.clear_session() model_path = os.path.join(model_dir, '{}.dat'.format(i)) model = create_model(timesteps, units)
def restore_data_1(path): data = restore_data(path) res = [np.expand_dims(x, axis=-1) for x in data] return tuple(res)
# In[3]: #!pip install livelossplot # In[4]: #!git clone https://github.com/lamductan/modis_utils # In[5]: from livelossplot import PlotLosses from modis_utils.misc import cache_data, restore_data # In[6]: data = restore_data(os.path.join('cache', 'boundary_vectors_ALL.h5')) # In[7]: train_boundary_vectors = data[0] val_boundary_vectors = data[1] test_boundary_vectors = data[2] # In[8]: n_points = train_boundary_vectors.shape[1] n_points # In[9]: train_boundary_vectors.shape, val_boundary_vectors.shape, test_boundary_vectors.shape
"test_interval": test_interval, "snapshot_interval": snapshot_interval, "whole_img_width": whole_img_width, "batch_norm_decay": batch_norm_decay, "batch_norm_epsilon": batch_norm_epsilon } # In[13]: timesteps = 47 # In[ ]: # In[14]: inputs_np_whole_img = restore_data( '../../multiscale_predrnn/data/sequence_data/test/0.dat')[0] inputs_np_whole_img.shape # In[15]: inference_convlstm_whole_img = InferenceConvLSTMWholeImg(params) # In[19]: inferences_np = {} np_input_dir = '../../multiscale_predrnn/data/sequence_data' steps_ahead = 80 for subset in ('test', 'val'): inference_dir = 'inferences/{}'.format(subset) if not os.path.exists(inference_dir):
import sys import numpy as np import matplotlib.pyplot as plt import shutil import datetime import tensorflow as tf from shutil import unpack_archive, make_archive from modis_utils.misc import restore_data from modis_utils.modis_utils import ModisUtils from modis_utils.model.loss_function import PSNRLoss, lossSSIM, SSIM, step_decay from modis_utils.model.loss_function import mse_with_mask_tf, mse_with_mask_tf_1, mse_with_mask # Parameters config_path = 'config.dat' config_params = restore_data(os.path.join(gdrive_dir, config_path)) # sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9) #lrate = LearningRateScheduler(step_decay) lr = config_params['lr'] training = False crop_size = config_params['crop_size'] input_timesteps = config_params['input_timesteps'] output_timesteps = config_params['output_timesteps'] batch_size = config_params['batch_size'] compile_params = config_params['compile_params'] model_name = config_params['model_name'] preprocessed_type = config_params['preprocessed_type'] modis_product = config_params['modis_product']