def eval(self, data_in, **kwargs): batch_size = kwargs.get('batch_size', 0) print_str = kwargs.get('print_str', True) need_imgs = kwargs.get('need_imgs', False) print_all_results = kwargs.get( 'print_all_results', False) # to print each results and index if type(data_in) is dict: time_start = time.time() eval_dict = self.model.eval(data_in, **kwargs) time_cost = time.time() - time_start if print_str: print('Evaluation time cost is {:.1f}'.format(time_cost)) return eval_dict data_provider = data_in ndata = data_provider.size m = ndata // batch_size n = ndata % batch_size eval_dict_str = {} eval_dict_img = {} time_start = time.time() for _ in range(m): data_dict = data_provider(batch_size) sub_eval_dict_str, sub_eval_dict_img = self.model.eval( data_dict, **kwargs) eval_dict_str = U.dict_concat(eval_dict_str, sub_eval_dict_str) if need_imgs: eval_dict_img = U.dict_append(eval_dict_img, sub_eval_dict_img) if n > 0: sub_eval_dict_str, sub_eval_dict_img = self.model.eval( data_provider(n), **kwargs) eval_dict_str = U.dict_concat(eval_dict_str, sub_eval_dict_str) if need_imgs: eval_dict_img = U.dict_append(eval_dict_img, sub_eval_dict_img) if print_str: time_cost = time.time() - time_start print('Evaluate {} data, time cost is {:.1f}'.format( ndata, time_cost)) print(' {}'.format(U.dict_to_str(eval_dict_str))) if print_all_results: results_dict = {} for key in eval_dict_str: value = eval_dict_str.get(key) if value.shape[0] == ndata: results_dict[key] = value for i in range(ndata): print_str = "Picture Index: {}\t".format(i) for key in results_dict: print_str += "{}: ".format(key) for k in results_dict[key][i]: print_str += "{:.5f} ".format(k) print_str += "\t" print(print_str) if need_imgs: return eval_dict_str, eval_dict_img else: return eval_dict_str
def eval(self, data_in, **kwargs): batch_size = kwargs.get('batch_size', 1) print_str = kwargs.get('print_str', True) need_imgs = kwargs.get('need_imgs', False) max_save_batch = kwargs.get('max_save_batch', float('inf')) if type(data_in) is dict: time_start = time.time() eval_dict = self.model.eval(data_in, **kwargs) time_cost = time.time() - time_start if print_str: print('Evaluation time cost is {:.1f}'.format(time_cost)) return eval_dict data_provider = data_in ndata = data_provider.size m = ndata // batch_size n = ndata % batch_size results = {} imgs = {} saved_batch_count = 0 if print_str: print('Evaluating {} data...'.format(ndata), end='') time_start = time.time() for i in range(m): sub_results = self.model.eval(data_provider(batch_size), **kwargs) sub_imgs = sub_results.pop( 'imgs') if 'imgs' in sub_results else None results = U.dict_concat(results, sub_results) if need_imgs and sub_imgs is not None and saved_batch_count < max_save_batch: imgs = U.dict_append(imgs, sub_imgs) saved_batch_count += 1 if print_str: print('\r\tEvalated {}/{} data'.format(batch_size * (i + 1), ndata), end='') if n > 0: sub_results = self.model.eval(data_provider(n), **kwargs) sub_imgs = sub_results.pop( 'imgs') if 'imgs' in sub_results else None results = U.dict_concat(results, sub_results) if need_imgs and sub_imgs is not None and saved_batch_count < max_save_batch: imgs = U.dict_append(imgs, sub_imgs) saved_batch_count += 1 if print_str: print('\r\tEvalated {}/{} data'.format(ndata, ndata)) time_cost = time.time() - time_start print('Time cost is {:.1f}'.format(time_cost)) print(' {}'.format(U.dict_to_str(results))) if need_imgs: return results, imgs else: return results
def _load_data(self, n): """Load and process data one by one Parameters ---------- n: int The number of images loaded. Returns ------- dict A dictionary of ndarray data: { 'data_suffix': ndarray, 'other_suffix 1': ndarray, 'other_suffix 1': ndarray, ... } The shape of ndarray will be (n, x, y, ..., c). n is the number of data, which caller asked. x, y, ... is the size of data. c is the number of channels (for label is the numebr of classes). """ data_dict = {} for _ in range(n): sub_data_dict = {} x_name = self._file_list[self._cur_i] sub_data_dict.update({self._org_suffix: L.load_file(x_name)}) for o_suffix in self._other_suffix: o_name = x_name.replace(self._org_suffix, o_suffix) sub_data_dict.update({o_suffix: L.load_file(o_name)}) # process if self._processor is not None: sub_data_dict = self._processor.pre_process(sub_data_dict) data_dict = U.dict_append(data_dict, sub_data_dict) self._next_idx() U.dict_list2arr(data_dict) return data_dict
def train(self, train_provider, validation_provider, epochs, batch_size, output_path, optimizer=None, learning_rate=None, mini_batch_size=None, eval_frequency=1, is_save_train_imgs=False, is_save_valid_imgs=True, max_save_batch=float('inf')): if learning_rate is None: learning_rate = optimizer.learning_rate.numpy() if type(learning_rate) is not list: learning_rate = [learning_rate] iters = train_provider.size / batch_size assert iters > 0 and iters % 1 == 0, 'batch size {} does not match the data size {}.'.format( batch_size, train_provider.size) mini_batch_size = batch_size if mini_batch_size is None else mini_batch_size mini_iters = batch_size / mini_batch_size assert mini_iters > 0 and mini_iters % 1 == 0, 'mini batch size {} does not match the batch size {}.'.format( mini_batch_size, batch_size) if not os.path.exists(output_path): os.makedirs(output_path) nets = self.model.net if type(self.model.net) is list else [ self.model.net ] net_args = {} for i in range(len(nets)): net_args.update({'net%d' % i: nets[i]}) ckpt = tf.train.Checkpoint(**net_args) start_message = ( 'Start training: epochs {}, learning rate {}, batch size {}, mini-batch size {}, training data {}, validation data {}.' .format(epochs, [str(lr) for lr in learning_rate], batch_size, mini_batch_size, train_provider.size, validation_provider.size)) print(start_message) with open(output_path + '/train_log.txt', 'a+') as f: f.write(start_message + '\n') train_eval_str = {} valid_eval_str = {} best_loss = float('inf') time_start = time.time() for ep in range(epochs): # TODO: change it back ep_time_start = time.time() for _ in range(int(iters)): grads = None for _ in range(int(mini_iters)): feed_dict = train_provider(mini_batch_size) mini_grads = self.model.get_grads(feed_dict) grads = self._grads_add(grads, mini_grads) grads = self._grads_div(grads, mini_iters) if type(grads) is tuple: assert len(optimizer) == len( grads ), 'Number of optimizer should equal to number of networks.' for gi in range(len(grads)): optimizer[gi].apply_gradients( zip(grads[gi], self.model.net[gi].trainable_variables)) else: optimizer.apply_gradients( zip(grads, self.model.net.trainable_variables)) ep_train_time = time.time() - ep_time_start ep_eval_time = 0 if ep % eval_frequency == 0 or ep == epochs - 1: ep_train_eval = self.eval(train_provider, batch_size=mini_batch_size, print_str=False, need_imgs=is_save_train_imgs, max_save_batch=max_save_batch) ep_valid_eval = self.eval(validation_provider, batch_size=mini_batch_size, print_str=False, need_imgs=is_save_valid_imgs, max_save_batch=max_save_batch) ep_eval_time = time.time() - ep_train_time - ep_time_start if is_save_train_imgs: save_img(ep_train_eval[1], '{}/train_imgs/'.format(output_path), ep) ep_train_eval = ep_train_eval[0] if is_save_valid_imgs: save_img(ep_valid_eval[1], '{}/valid_imgs/'.format(output_path), ep) ep_valid_eval = ep_valid_eval[0] save_str(ep_train_eval, '{}/train_eval.txt'.format(output_path), ep) save_str(ep_valid_eval, '{}/valid_eval.txt'.format(output_path), ep) # save best ckpt if np.mean(ep_valid_eval['loss']) < best_loss: ckpt.write(output_path + '/ckpt/best') best_loss = np.mean(ep_valid_eval['loss']) # time_ep_save_imgs_end = time.time() train_log = ( 'epoch {} ------ time cost: overall {:.1f} ------ step training {:.1f} ------ step evaluation {:.1f} ------ learning rate: {} ------' .format(ep, time.time() - time_start, ep_train_time, ep_eval_time, [str(lr) for lr in learning_rate])) if ep % eval_frequency == 0 or ep == epochs - 1: train_log += ('\n train : {}'.format(U.dict_to_str(ep_train_eval)) + \ '\n validation : {}'.format(U.dict_to_str(ep_valid_eval))) print(train_log) with open(output_path + '/train_log.txt', 'a+') as f: f.write(train_log + '\n') train_eval_str = U.dict_append(train_eval_str, ep_train_eval) valid_eval_str = U.dict_append(valid_eval_str, ep_valid_eval) # TODO add early stopping and best ckpt save # TODO add tensorboard summary ckpt.write(output_path + '/ckpt/final') return train_eval_str, valid_eval_str
def train(self, train_provider, validation_provider, epochs, batch_size, output_path, optimizer=tf.keras.optimizers.Adam(), learning_rate=None, mini_batch_size=None, eval_frequency=1, is_save_train_imgs=False, is_save_valid_imgs=True, is_rebuilt_path=True): if learning_rate is None: learning_rate = optimizer.learning_rate.numpy() if is_rebuilt_path and os.path.exists(output_path): shutil.rmtree(output_path, ignore_errors=True) iters = train_provider.size / batch_size assert iters > 0 and iters % 1 == 0, 'batch size {} does not match the data size {}.'.format( batch_size, train_provider.size) mini_batch_size = batch_size if mini_batch_size is None else mini_batch_size mini_iters = batch_size / mini_batch_size assert mini_iters > 0 and mini_iters % 1 == 0, 'mini batch size {} does not match the batch size {}.'.format( mini_batch_size, batch_size) if not os.path.exists(output_path): os.makedirs(output_path) ckpt = tf.train.Checkpoint(net=self.model.net) print( 'Start training: epochs {}, learning rate {:g}, batch size {}, mini-batch size {}, training data {}, validation data {}.' .format(epochs, float(learning_rate), batch_size, mini_batch_size, train_provider.size, validation_provider.size)) train_eval_str = {} valid_eval_str = {} best_loss = float('inf') time_start = time.time() for ep in range(epochs): ep_time_start = time.time() for _ in range(int(iters)): grads = None for _ in range(int(mini_iters)): feed_dict = train_provider(mini_batch_size) mini_grads = self.model.get_grads(feed_dict) grads = self._grads_add(grads, mini_grads) grads = self._grads_div(grads, mini_iters) optimizer.apply_gradients( zip(grads, self.model.net.trainable_variables)) ep_train_time = time.time() - ep_time_start ep_eval_time = 0 if ep % eval_frequency == 0 or ep == epochs - 1: ep_train_eval = self.eval(train_provider, batch_size=mini_batch_size, print_str=False, need_imgs=is_save_train_imgs) ep_valid_eval = self.eval(validation_provider, batch_size=mini_batch_size, print_str=False, need_imgs=is_save_valid_imgs) ep_eval_time = time.time() - ep_train_time - ep_time_start if is_save_train_imgs: save_img(ep_train_eval[1], '{}/train_imgs/'.format(output_path), ep) ep_train_eval = ep_train_eval[0] if is_save_valid_imgs: save_img(ep_valid_eval[1], '{}/valid_imgs/'.format(output_path), ep) ep_valid_eval = ep_valid_eval[0] save_str(ep_train_eval, '{}/train_eval.txt'.format(output_path), ep) save_str(ep_valid_eval, '{}/valid_eval.txt'.format(output_path), ep) # save best ckpt if np.mean(ep_valid_eval['loss']) < best_loss: ckpt.write(output_path + '/ckpt/best') best_loss = np.mean(ep_valid_eval['loss']) # time_ep_save_imgs_end = time.time() train_log = ( 'epoch {} ------ time cost: overall {:.1f} ------ step training {:.1f} ------ step evaluation {:.1f} ------ learning rate: {:g} ------' .format(ep, time.time() - time_start, ep_train_time, ep_eval_time, float(learning_rate))) if ep % eval_frequency == 0 or ep == epochs - 1: train_log += ('\n train : {}'.format(U.dict_to_str(ep_train_eval)) + \ '\n validation : {}'.format(U.dict_to_str(ep_valid_eval))) print(train_log) with open(output_path + '/train_log.txt', 'a+') as f: f.write(train_log + '\n') train_eval_str = U.dict_append(train_eval_str, ep_train_eval) valid_eval_str = U.dict_append(valid_eval_str, ep_valid_eval) # TODO add early stopping and best ckpt save # TODO add tensorboard summary ckpt.write(output_path + '/ckpt/final') return train_eval_str, valid_eval_str