def test_training_save(self): """Illustrate saving to the grid file system during training time.""" base_exp_id = 'training_save' params = self.setup_params(base_exp_id) num_models = len(params['model_params']) params['save_params']['save_to_gfs'] = ['first_image'] params['save_params']['save_valid_freq'] = 3000 params['save_params']['save_filters_freq'] = 30000 params['save_params']['cache_filters_freq'] = 3000 params['train_params']['targets'] = { 'func': self.get_first_image_target} # Actually run the training. base.train_from_params(**params) # Check that the first image has been saved. for i in range(num_models): exp_id = base_exp_id + '_model_{}'.format(i) coll = self.collection['files'] q = {'exp_id': exp_id, 'train_results': {'$exists': True}} train_steps = coll.find(q) self.assertEqual(train_steps.count(), 5) idx = train_steps[0]['_id'] fn = coll.find({'item_for': idx})[0]['filename'] fs = gridfs.GridFS(coll.database, self.collection_name) fh = fs.get_last_version(fn) saved_data = pickle.loads(fh.read()) fh.close() self.assertIn('train_results', saved_data) self.assertIn('first_image', saved_data['train_results']) self.assertEqual(len(saved_data['train_results']['first_image']), 100) self.assertEqual(saved_data['train_results']['first_image'][0].shape, (28 * 28,))
def train_nipscnn_ns(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'deepretina' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' base.get_params() base.train_from_params(**params)
def train_cnn(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'cnn' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' params['model_params'] = # FILL IN HERE params['learning_rate_params']['learning_rate'] = 1e-3 base.train_from_params(**params)
def main(): # Parse arguments cfg = get_config() args = cfg.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # Get params needed, start training params = get_params_from_arg(args) base.train_from_params(**params)
def train_ln(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'ln_model' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' params['model_params']['func'] = ln params['learning_rate_params']['learning_rate'] = 1e-3 base.train_from_params(**params)
def main(): # Parse arguments cfg = get_config() args = cfg.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu params = get_params_from_args(args) params['loss_params']['agg_func'] = reg_loss_in_faster cache_dir = os.path.join(args.cache_dir, 'models_tfutils', args.save_exp) params['save_params']['cache_dir'] = cache_dir base.train_from_params(**params)
def train_cnn(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'cnn' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' params['model_params']['func'] = cnn if stim_type == 'whitenoise': params['learning_rate_params']['learning_rate'] = 1e-3 else: params['learning_rate_params']['learning_rate'] = 1e-5 base.train_from_params(**params)
def start_training(params, args): if args.tfutils: params['loss_params']['agg_func'] = reg_loss_in_tfutils db_name, col_name, exp_id = args.save_exp.split('/') cache_dir = os.path.join(args.cache_dir, 'models_tfutils', db_name, col_name, exp_id) params['save_params']['cache_dir'] = cache_dir from tfutils import base base.train_from_params(**params) else: from framework import TrainFramework train_framework = TrainFramework(params) train_framework.train()
def test_custom_training(self): """Illustrate training with custom training loop. This test illustrates how basic training is performed with a custom training loop using the tfutils.base.train_from_params function. """ exp_id = 'training0' params = self.setup_params(exp_id) # Add a custom train_loop to use during training. params['train_params']['train_loop'] = {'func': self.custom_train_loop} base.train_from_params(**params)
def main(): # Parse arguments cfg = get_config() args = cfg.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu params = {'skip_check': True, 'log_device_placement': False} add_training_params(params, args) add_save_and_load_params(params, args) add_optimization_params(params, args) add_validation_params(params, args) base.train_from_params(**params)
def train_ln(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'ln_model' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' params['model_params'] = { 'func': ln, 'num_gpus': NUM_GPUS, 'devices': DEVICES, 'prefix': MODEL_PREFIX } params['learning_rate_params']['learning_rate'] = 1e-3 base.train_from_params(**params)
def main(): # Parse arguments cfg = get_config() args = cfg.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # Get params needed, start training params = get_params_from_arg(args) if not args.pure_test: base.train_from_params(**params) else: params.pop('learning_rate_params') params.pop('optimizer_params') params.pop('loss_params') params.pop('train_params') base.test_from_params(**params)
def main(): parser = get_parser() args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu params = get_params_from_arg(args) #base.train_from_params(**params) if len(args.innerargs) == 0: params = get_params_from_arg(args) if not params is None: base.train_from_params(**params) else: params = { 'save_params': [], 'load_params': [], 'model_params': [], 'train_params': None, 'loss_params': [], 'learning_rate_params': [], 'optimizer_params': [], 'log_device_placement': False, # if variable placement has to be logged 'validation_params': [], } list_names = [ "save_params", "load_params", "model_params", "validation_params", "loss_params", "learning_rate_params", "optimizer_params" ] for curr_arg in args.innerargs: args = parser.parse_args(curr_arg.split()) curr_params = get_params_from_arg(args) for tmp_key in list_names: params[tmp_key].append(curr_params[tmp_key]) params['train_params'] = curr_params['train_params'] base.train_from_params(**params)
def train_cnn(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'cnn' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' params['model_params'] = { 'func': cnn, 'num_gpus': NUM_GPUS, 'devices': DEVICES, 'prefix': MODEL_PREFIX } # 1e-4 for natural scenes #params['learning_rate_params']['learning_rate'] = 1e-3 params['learning_rate_params']['learning_rate'] = 1e-4 base.train_from_params(**params)
def train_cnn_fc_lstm(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'cnn_fc_lstm' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' # Set to True if starting training again params['load_params']['do_restore'] = False params['model_params'] = { 'func': rnn_fc, 'num_gpus': NUM_GPUS, 'devices': DEVICES, 'prefix': MODEL_PREFIX } params['learning_rate_params']['learning_rate'] = 1e-5 base.train_from_params(**params)
def convrnn_imagenet_test(): os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3,4,5,7,8,9' input_args = ['--gpu', '1,2,3,4,5,7,8,9'] all_params = train_median_wfb( edges_arr=edges_5, input_args=input_args) old_model_params = all_params.pop('model_params') def _temp_model_func(inputs, *args, **kwargs): output = convrnn_model( inputs['images'], input_args=input_args, units=1000, *args, **kwargs) return output, {} new_model_params = { 'func': _temp_model_func, 'devices': old_model_params['devices'], 'num_gpus': old_model_params['num_gpus'], } all_params['model_params'] = new_model_params all_params['save_params'] = { 'host': 'localhost', 'port': 27009, 'dbname': 'convrnn', 'collname': 'control', 'exp_id': 'cate', 'do_save': True, 'save_initial_filters': True, 'save_metrics_freq': 1000, 'save_valid_freq': 10009, 'save_filters_freq': 100090, 'cache_filters_freq': 100090, } all_params['load_params'] = { 'host': 'localhost', 'port': 27009, 'dbname': 'convrnn', 'collname': 'control', 'exp_id': 'cate', 'do_restore': True, } all_params['validation_params'] = {} print(all_params.keys()) base.train_from_params(**all_params)
def main(): # Parse arguments cfg = get_config() args = cfg.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # Get params needed, start training params = get_params_from_arg(args) if args.tfutils: params['loss_params']['agg_func'] = reg_loss_in_tfutils cache_dir = os.path.join(args.cache_dir, 'models_tfutils', args.db_name, args.col_name, args.exp_id) params['save_params']['cache_dir'] = cache_dir from tfutils import base base.train_from_params(**params) else: from framework import TrainFramework train_framework = TrainFramework(params) train_framework.train()
def train_cnn_lstm_dropout_fb(): params = copy.deepcopy(default_params) params['save_params']['dbname'] = 'cnn_lstm_dropout_fb' params['save_params']['collname'] = stim_type params['save_params']['exp_id'] = 'trainval0' # Set to True if starting training again params['load_params']['do_restore'] = True params['model_params'] = { 'func': convLstmDropout, 'edges_arr': [('conv2', 'conv1')], 'num_gpus': NUM_GPUS, 'devices': DEVICES, 'prefix': MODEL_PREFIX } params['learning_rate_params']['learning_rate'] = 1e-5 base.train_from_params(**params)
def main(argv): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu del argv # Unused params = Params() if FLAGS.load_params_file is not None: assert (FLAGS.load_params_file[-4:] == '.pkl') print("Loading params from file: {}".format(FLAGS.load_params_file)) print("Ignoring all config flags") params.load(FLAGS.load_params_file, FLAGS) else: print("Parsing params from flags...") params.customize(flags=FLAGS) params_copy = params.get_params_copy() print("All params: ") print(params_copy) if FLAGS.save_params_file is not None: assert (FLAGS.save_params_file[-4:] == '.pkl') params.save(FLAGS.save_params_file) base.train_from_params(**params_copy)
def test_training_save(self): """Illustrate saving to the grid file system during training time.""" exp_id = 'training_save' params = self.setup_params(exp_id) # Modify a few of the save parameters. params['save_params']['save_valid_freq'] = 3000 params['save_params']['save_filters_freq'] = 30000 params['save_params']['cache_filters_freq'] = 3000 # Specify additional save_params for saving to gfs. params['save_params']['save_to_gfs'] = ['first_image'] params['train_params']['targets'] = { 'func': self.get_first_image_target } # Actually run the training. base.train_from_params(**params) # Check that the first image has been saved. coll = self.collection['files'] q = {'exp_id': exp_id, 'train_results': {'$exists': True}} train_steps = coll.find(q) self.assertEqual(train_steps.count(), 5) idx = train_steps[0]['_id'] fn = coll.find({'item_for': idx})[0]['filename'] fs = gridfs.GridFS(coll.database, self.collection_name) fh = fs.get_last_version(fn) saved_data = cPickle.loads(fh.read()) fh.close() # Assert as expected. self.assertIn('train_results', saved_data) self.assertIn('first_image', saved_data['train_results']) self.assertEqual(len(saved_data['train_results']['first_image']), 100) self.assertEqual(saved_data['train_results']['first_image'][0].shape, (28 * 28, ))
def test_training(self): base_exp_id = 'training0' params = self.setup_params(base_exp_id) num_models = len(params['model_params']) # Actually run the training. base.train_from_params(**params) # Test if results are as expected. for i in range(num_models): exp_id = base_exp_id + '_model_{}'.format(i) self.assert_as_expected(exp_id, count=26, step=[0, 200, 400]) r = self.collection['files'].find({'exp_id': exp_id, 'step': 0})[0] self.asserts_for_record(r, params, train=True) r = self.collection['files'].find({ 'exp_id': exp_id, 'step': 20 })[0] self.asserts_for_record(r, params, train=True) # Run another 500 steps of training on the same experiment id. params['train_params']['num_steps'] = 1000 base.train_from_params(**params) # Test if results are as expected. for i in range(num_models): exp_id = base_exp_id + '_model_{}'.format(i) self.assert_as_expected(exp_id, 51, [0, 200, 400, 600, 800, 1000]) try: #idiotic thing to allow py2-3 compatiility eqmeth = self.assertItemsEqual except AttributeError: eqmeth = self.assertCountEqual eqmeth(self.collection['files'].distinct('exp_id'), [ base_exp_id + '_model_{}'.format(i) for i in range(num_models) ]) r = self.collection['files'].find({ 'exp_id': exp_id, 'step': 1000 })[0] self.asserts_for_record(r, params, train=True) # Run 500 more steps but save to a new experiment id. new_exp_id = 'training1' params['train_params']['num_steps'] = 1500 params['load_params'] = {'exp_id': base_exp_id} params['save_params']['exp_id'] = new_exp_id base.train_from_params(**params) for i in range(num_models): exp_id = new_exp_id + '_model_{}'.format(i) self.assert_step(exp_id, [1200, 1400])
def test_training(self): """Illustrate training. This test illustrates how basic training is performed using the tfutils.base.train_from_params function. This is the first in a sequence of interconnected tests. It creates a pretrained model that is used by the next few tests (test_validation and test_feature_extraction). As can be seen by looking at how the test checks for correctness, after the training is run, results of training, including (intermittently) the full variables needed to re-initialize the tensorflow model, are stored in a MongoDB. Also see docstring of the tfutils.base.train_from_params function for more detailed information about usage. """ exp_id = 'training0' params = self.setup_params(exp_id) # Run training. base.train_from_params(**params) # Test if results are as expected. self.assert_as_expected(exp_id, count=26, step=[0, 200, 400]) r = self.collection['files'].find({'exp_id': exp_id, 'step': 0})[0] self.asserts_for_record(r, params, train=True) r = self.collection['files'].find({'exp_id': exp_id, 'step': 20})[0] self.asserts_for_record(r, params, train=True) # Run another 500 steps of training on the same experiment id. params['train_params']['num_steps'] = 1000 base.train_from_params(**params) # Test if results are as expected. self.assert_as_expected(exp_id, 51, [0, 200, 400, 600, 800, 1000]) self.assertEqual(self.collection['files'].distinct('exp_id'), [exp_id]) r = self.collection['files'].find({'exp_id': exp_id, 'step': 1000})[0] self.asserts_for_record(r, params, train=True) # Run 500 more steps but save to a new experiment id. new_exp_id = 'training1' params['train_params']['num_steps'] = 1500 params['load_params'] = {'exp_id': exp_id} params['save_params']['exp_id'] = new_exp_id base.train_from_params(**params) self.assert_step(new_exp_id, [1200, 1400])
def train(config, dbname, collname, exp_id, port, gpus=[0], use_default=True, load=True): tfutils_params = config['default_params'] if use_default else {} ### MODEL ### model_params = initialize_psgnet_model(config) loss_names = model_params['func'].Losses.keys() model_params.update({ 'devices': ['/gpu:' + str(i) for i in range(len(gpus))], 'num_gpus': len(gpus), 'seed': FLAGS.seed, 'prefix': 'model_0' }) tfutils_params['model_params'] = model_params ### INPUT DATA ### train_params, val_params = build_trainval_params(config, loss_names=loss_names) update_tfutils_params('train', tfutils_params, train_params, config={}) update_tfutils_params('validation', tfutils_params, val_params, config={}) ### OPTIMIZATION ### trainable = FLAGS.trainable if trainable is not None: trainable = trainable.split(',') opt_params = {'trainable_scope': trainable} update_tfutils_params('optimizer', tfutils_params, opt_params, config) update_tfutils_params('loss', tfutils_params, {}, config) update_tfutils_params('learning_rate', tfutils_params, {}, config) ### SAVE AND LOAD ### save_params = { 'dbname': dbname, 'collname': collname, 'exp_id': exp_id, 'port': port } update_tfutils_params('save', tfutils_params, save_params, config) load_params = copy.deepcopy(save_params) load_exp_id = FLAGS.load_exp_id or exp_id load_params.update({ 'do_restore': True, 'exp_id': load_exp_id, 'query': { 'step': FLAGS.step }, 'restore_global_step': True if (exp_id == load_exp_id) else False }) update_tfutils_params('load', tfutils_params, load_params if load else None, config) ### TODO save out config ### save_config(tfutils_params, save_dir=FLAGS.save_dir) logging.info(pprint.pformat(tfutils_params)) base.train_from_params(**tfutils_params)
def validate_tpu(test_params): print("Validating only") print("All params: ") print(test_params) base.train_from_params(**test_params)
def main(): parser = argparse.ArgumentParser(description='The script to train the mask R-CNN') # System setting parser.add_argument('--gpu', default = '0', type = str, action = 'store', help = 'Index of gpu, currently only one gpu is allowed') # General setting parser.add_argument('--nport', default = 27017, type = int, action = 'store', help = 'Port number of mongodb') parser.add_argument('--expId', default = "maskrcnn", type = str, action = 'store', help = 'Name of experiment id') parser.add_argument('--cacheDirPrefix', default = "/mnt/fs0/chengxuz/", type = str, action = 'store', help = 'Prefix of cache directory') parser.add_argument('--batchsize', default = 1, type = int, action = 'store', help = 'Batch size, only 1 is supported now') parser.add_argument('--initlr', default = 0.002, type = float, action = 'store', help = 'Initial learning rate') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu exp_id = args.expId dbname = 'normalnet-test' colname = 'maskrcnn' cache_dir = os.path.join(args.cacheDirPrefix, '.tfutils', 'localhost:'+ str(args.nport), dbname, colname, exp_id) BATCH_SIZE = args.batchsize n_threads = 4 # Define all params train_data_param = { 'func': COCO, 'data_path': DATA_PATH, 'group': 'train', 'n_threads': n_threads, 'batch_size': 1, 'key_list': KEY_LIST, } train_queue_params = { 'queue_type': 'random', 'batch_size': BATCH_SIZE, 'seed': 0, 'capacity': 10 } NUM_BATCHES_PER_EPOCH = 82783//BATCH_SIZE learning_rate_params = { 'func': tf.train.exponential_decay, 'learning_rate': args.initlr, 'decay_rate': 0.94, 'decay_steps': NUM_BATCHES_PER_EPOCH*2, # exponential decay each epoch 'staircase': True } model_params = { 'func': pack_model } optimizer_class = tf.train.MomentumOptimizer optimizer_params = { 'func': optimizer.ClipOptimizer, 'optimizer_class': optimizer_class, 'clip': True, 'momentum': .99 } save_params = { 'host': 'localhost', 'port': args.nport, 'dbname': dbname, 'collname': colname, 'exp_id': exp_id, 'do_save': True, 'save_initial_filters': True, 'save_metrics_freq': 2500, # keeps loss from every SAVE_LOSS_FREQ steps. 'save_valid_freq': 5000, 'save_filters_freq': 5000, 'cache_filters_freq': 5000, 'cache_dir': cache_dir, } train_params = { 'validate_first': False, 'data_params': train_data_param, 'queue_params': train_queue_params, 'thres_loss': np.finfo(np.float32).max, 'num_steps': 20 * NUM_BATCHES_PER_EPOCH # number of steps to train } load_query = None load_params = { 'host': 'localhost', 'port': args.nport, 'dbname': dbname, 'collname': colname, 'exp_id': exp_id, 'do_restore': True, 'query': load_query } loss_func = pack_loss loss_params = { 'targets': ['height', 'width', 'num_objects', 'labels', 'segmentation_masks', 'bboxes'], 'agg_func': tf.reduce_mean, 'loss_per_case_func': loss_func, } postsess_params = { 'func': restore, } params = { 'save_params': save_params, 'load_params': load_params, 'model_params': model_params, 'train_params': train_params, 'loss_params': loss_params, 'learning_rate_params': learning_rate_params, 'optimizer_params': optimizer_params, 'postsess_params': postsess_params, 'log_device_placement': False, # if variable placement has to be logged 'validation_params': {}, } # Run the training base.train_from_params(**params)
# RDM correlation retval['spearman_corrcoef_%s' % layer] = \ spearmanr( np.reshape(retval['rdm_%s' % layer], [-1]), np.reshape(retval['rdm_it'], [-1]) )[0] # categorization test retval['categorization_%s' % layer] = \ self.categorization_test(features[layer], meta, ['V6']) # within-categorization test retval['within_categorization_%s' % layer] = \ self.within_categorization_test(features[layer], meta, ['V6']) # IT regression test retval['it_regression_%s' % layer] = \ self.regression_test(features[layer], IT_feats, meta, ['V6']) # meta regression test retval['meta_regression_%s' % layer] = \ self.meta_regression_test(features[layer], meta, ['V6']) return retval if __name__ == '__main__': """ Illustrates how to run the configured model using tfutils """ base.get_params() m = ImageNetExperiment() params = m.setup_params() base.train_from_params(**params)
def main(args): #cfg_initial = postprocess_config(json.load(open(cfgfile))) if args.gpu>-1: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) #cfg_initial = preprocess_config(json.load(open(args.pathconfig))) cfg_initial = postprocess_config(json.load(open(args.pathconfig))) exp_id = args.expId cache_dir = os.path.join(args.cacheDirPrefix, '.tfutils', 'localhost:'+ str(args.nport), 'normalnet-test', 'normalnet', exp_id) #queue_capa = BATCH_SIZE*120 #queue_capa = BATCH_SIZE*500 BATCH_SIZE = normal_encoder_asymmetric_with_bypass.getBatchSize(cfg_initial) if args.batchsize: BATCH_SIZE = args.batchsize queue_capa = normal_encoder_asymmetric_with_bypass.getQueueCap(cfg_initial) n_threads = 4 func_net = getattr(normal_encoder_asymmetric_with_bypass, args.namefunc) train_data_param = { 'func': Threedworld_hdf5, #'func': train_normalnet_hdf5.Threedworld, 'data_path': DATA_PATH_hdf5, 'group': 'train', 'crop_size': IMAGE_SIZE_CROP, 'n_threads': n_threads, 'batch_size': 2, } val_data_param = { 'func': Threedworld_hdf5, #'func': train_normalnet_hdf5.Threedworld, 'data_path': DATA_PATH_hdf5, 'group': 'val', 'crop_size': IMAGE_SIZE_CROP, 'n_threads': n_threads, 'batch_size': 2, } train_queue_params = { 'queue_type': 'fifo', 'batch_size': BATCH_SIZE, 'seed': 0, 'capacity': BATCH_SIZE*10, } val_queue_params = train_queue_params val_target = 'normals' if args.usehdf5==0: train_data_param['func'] = Threedworld val_data_param['func'] = Threedworld train_data_param['data_path'] = DATA_PATH val_data_param['data_path'] = DATA_PATH #train_data_param['n_threads'] = n_threads #val_data_param['n_threads'] = n_threads train_queue_params = { 'queue_type': 'random', 'batch_size': BATCH_SIZE, 'seed': 0, 'capacity': queue_capa, # 'n_threads' : 4 } val_queue_params = { 'queue_type': 'fifo', 'batch_size': BATCH_SIZE, 'seed': 0, 'capacity': BATCH_SIZE*10, } val_target = 'normals' if args.whichdataset==1: train_data_param['func'] = SceneNet val_data_param['func'] = SceneNet train_data_param['data_path'] = DATA_PATH_SCENE val_data_param['data_path'] = DATA_PATH_SCENE val_step_num = val_data_param['func'].N_VAL // BATCH_SIZE + 1 NUM_BATCHES_PER_EPOCH = train_data_param['func'].N_TRAIN // BATCH_SIZE if args.valinum>-1: val_step_num = args.valinum loss_func = loss_ave_l2 learning_rate_params = { 'func': tf.train.exponential_decay, 'learning_rate': .01, 'decay_rate': .95, 'decay_steps': NUM_BATCHES_PER_EPOCH, # exponential decay each epoch 'staircase': True } model_params = { 'func': func_net, 'seed': args.seed, 'cfg_initial': cfg_initial } optim_params = { 'func': optimizer.ClipOptimizer, 'optimizer_class': tf.train.MomentumOptimizer, 'clip': True, 'momentum': .9 } if args.whichloss==1: loss_func = loss_ave_invdot learning_rate_params = { 'func': tf.train.exponential_decay, 'learning_rate': .001, 'decay_rate': .5, 'decay_steps': NUM_BATCHES_PER_EPOCH, # exponential decay each epoch 'staircase': True } #optimizer_class = tf.train.RMSPropOptimizer #train_data_param['center_im'] = True #val_data_param['center_im'] = True model_params['center_im'] = True optim_params = { 'func': optimizer.ClipOptimizer, 'optimizer_class': tf.train.RMSPropOptimizer, 'clip': True, } params = { 'save_params': { 'host': 'localhost', #'port': 31001, 'port': args.nport, 'dbname': 'normalnet-test', 'collname': 'normalnet', #'exp_id': 'trainval0', 'exp_id': exp_id, #'exp_id': 'trainval2', # using screen? 'do_save': True, #'do_save': False, 'save_initial_filters': True, 'save_metrics_freq': 2000, # keeps loss from every SAVE_LOSS_FREQ steps. 'save_valid_freq': 5000, #'save_metrics_freq': 100, # keeps loss from every SAVE_LOSS_FREQ steps. #'save_valid_freq': 100, 'save_filters_freq': 5000, 'cache_filters_freq': 5000, 'cache_dir': cache_dir, # defaults to '~/.tfutils' 'save_to_gfs': ['images_fea', 'normals_fea', 'outputs_fea'], #'save_intermediate_freq': 1, }, 'load_params': { 'host': 'localhost', # 'port': 31001, # 'dbname': 'alexnet-test', # 'collname': 'alexnet', # 'exp_id': 'trainval0', 'port': args.nport, 'dbname': 'normalnet-test', 'collname': 'normalnet', #'exp_id': 'trainval0', 'exp_id': exp_id, #'exp_id': 'trainval2', # using screen? 'do_restore': True, 'load_query': None }, 'model_params': model_params, 'train_params': { #'validate_first': False, 'validate_first': True, 'data_params': train_data_param, 'queue_params': train_queue_params, 'thres_loss': 1000, 'num_steps': 90 * NUM_BATCHES_PER_EPOCH # number of steps to train }, 'loss_params': { 'targets': val_target, 'agg_func': tf.reduce_mean, 'loss_per_case_func': loss_func, 'loss_per_case_func_params': {} }, 'learning_rate_params': learning_rate_params, 'optimizer_params': optim_params, 'log_device_placement': False, # if variable placement has to be logged 'validation_params': { 'topn': { 'data_params': val_data_param, 'queue_params': val_queue_params, 'targets': { 'func': rep_loss, 'target': val_target, }, #'num_steps': Threedworld.N_VAL // BATCH_SIZE + 1, 'num_steps': val_step_num, 'agg_func': lambda x: {k:np.mean(v) for k,v in x.items()}, 'online_agg_func': online_agg }, 'feats':{ 'data_params': val_data_param, 'queue_params': val_queue_params, 'targets': { 'func': save_features, 'num_to_save': 5, 'targets' : [], }, #'num_steps': Threedworld.N_VAL // BATCH_SIZE + 1, 'num_steps': 10, 'agg_func': mean_losses_keep_rest, #'online_agg_func': online_agg }, }, } #base.get_params() base.train_from_params(**params)