Ejemplo n.º 1
0
    def test_training_save(self):
        """Illustrate saving to the grid file system during training time."""
        base_exp_id = 'training_save'
        params = self.setup_params(base_exp_id)
        num_models = len(params['model_params'])

        params['save_params']['save_to_gfs'] = ['first_image']
        params['save_params']['save_valid_freq'] = 3000
        params['save_params']['save_filters_freq'] = 30000
        params['save_params']['cache_filters_freq'] = 3000
        params['train_params']['targets'] = {
                'func': self.get_first_image_target}

        # Actually run the training.
        base.train_from_params(**params)

        # Check that the first image has been saved.
        for i in range(num_models):
            exp_id = base_exp_id + '_model_{}'.format(i)
            coll = self.collection['files']
            q = {'exp_id': exp_id, 'train_results': {'$exists': True}}
            train_steps = coll.find(q)
            self.assertEqual(train_steps.count(), 5)
            idx = train_steps[0]['_id']
            fn = coll.find({'item_for': idx})[0]['filename']
            fs = gridfs.GridFS(coll.database, self.collection_name)
            fh = fs.get_last_version(fn)
            saved_data = pickle.loads(fh.read())
            fh.close()

            self.assertIn('train_results', saved_data)
            self.assertIn('first_image', saved_data['train_results'])
            self.assertEqual(len(saved_data['train_results']['first_image']), 100)
            self.assertEqual(saved_data['train_results']['first_image'][0].shape, (28 * 28,))
Ejemplo n.º 2
0
def train_nipscnn_ns():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'deepretina'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    base.get_params()
    base.train_from_params(**params)
Ejemplo n.º 3
0
def train_cnn():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'cnn'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    params['model_params'] = # FILL IN HERE
    params['learning_rate_params']['learning_rate'] = 1e-3
    base.train_from_params(**params)
Ejemplo n.º 4
0
def main():
    # Parse arguments
    cfg = get_config()
    args = cfg.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    # Get params needed, start training
    params = get_params_from_arg(args)
    base.train_from_params(**params)
Ejemplo n.º 5
0
def train_ln():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'ln_model'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    params['model_params']['func'] = ln
    params['learning_rate_params']['learning_rate'] = 1e-3
    base.train_from_params(**params)
Ejemplo n.º 6
0
def main():
    # Parse arguments
    cfg = get_config()
    args = cfg.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    params = get_params_from_args(args)
    params['loss_params']['agg_func'] = reg_loss_in_faster
    cache_dir = os.path.join(args.cache_dir, 'models_tfutils', args.save_exp)
    params['save_params']['cache_dir'] = cache_dir
    base.train_from_params(**params)
Ejemplo n.º 7
0
def train_cnn():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'cnn'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    params['model_params']['func'] = cnn
    if stim_type == 'whitenoise':
        params['learning_rate_params']['learning_rate'] = 1e-3
    else:
        params['learning_rate_params']['learning_rate'] = 1e-5
    base.train_from_params(**params)
Ejemplo n.º 8
0
def start_training(params, args):
    if args.tfutils:
        params['loss_params']['agg_func'] = reg_loss_in_tfutils
        db_name, col_name, exp_id = args.save_exp.split('/')
        cache_dir = os.path.join(args.cache_dir, 'models_tfutils', db_name,
                                 col_name, exp_id)
        params['save_params']['cache_dir'] = cache_dir
        from tfutils import base
        base.train_from_params(**params)
    else:
        from framework import TrainFramework
        train_framework = TrainFramework(params)
        train_framework.train()
Ejemplo n.º 9
0
    def test_custom_training(self):
        """Illustrate training with custom training loop.

        This test illustrates how basic training is performed with a custom
        training loop using the tfutils.base.train_from_params function.

        """
        exp_id = 'training0'
        params = self.setup_params(exp_id)

        # Add a custom train_loop to use during training.
        params['train_params']['train_loop'] = {'func': self.custom_train_loop}

        base.train_from_params(**params)
Ejemplo n.º 10
0
def main():
    # Parse arguments
    cfg = get_config()
    args = cfg.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    params = {'skip_check': True, 'log_device_placement': False}

    add_training_params(params, args)
    add_save_and_load_params(params, args)
    add_optimization_params(params, args)
    add_validation_params(params, args)

    base.train_from_params(**params)
Ejemplo n.º 11
0
def train_ln():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'ln_model'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    params['model_params'] = {
        'func': ln,
        'num_gpus': NUM_GPUS,
        'devices': DEVICES,
        'prefix': MODEL_PREFIX
    }

    params['learning_rate_params']['learning_rate'] = 1e-3
    base.train_from_params(**params)
Ejemplo n.º 12
0
def main():
    # Parse arguments
    cfg = get_config()
    args = cfg.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    # Get params needed, start training
    params = get_params_from_arg(args)
    if not args.pure_test:
        base.train_from_params(**params)
    else:
        params.pop('learning_rate_params')
        params.pop('optimizer_params')
        params.pop('loss_params')
        params.pop('train_params')
        base.test_from_params(**params)
Ejemplo n.º 13
0
def main():

    parser = get_parser()

    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    params = get_params_from_arg(args)

    #base.train_from_params(**params)

    if len(args.innerargs) == 0:
        params = get_params_from_arg(args)

        if not params is None:
            base.train_from_params(**params)
    else:
        params = {
            'save_params': [],
            'load_params': [],
            'model_params': [],
            'train_params': None,
            'loss_params': [],
            'learning_rate_params': [],
            'optimizer_params': [],
            'log_device_placement':
            False,  # if variable placement has to be logged
            'validation_params': [],
        }

        list_names = [
            "save_params", "load_params", "model_params", "validation_params",
            "loss_params", "learning_rate_params", "optimizer_params"
        ]

        for curr_arg in args.innerargs:

            args = parser.parse_args(curr_arg.split())
            curr_params = get_params_from_arg(args)

            for tmp_key in list_names:
                params[tmp_key].append(curr_params[tmp_key])

            params['train_params'] = curr_params['train_params']

        base.train_from_params(**params)
Ejemplo n.º 14
0
def train_cnn():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'cnn'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    params['model_params'] = {
        'func': cnn,
        'num_gpus': NUM_GPUS,
        'devices': DEVICES,
        'prefix': MODEL_PREFIX
    }

    # 1e-4 for natural scenes
    #params['learning_rate_params']['learning_rate'] = 1e-3
    params['learning_rate_params']['learning_rate'] = 1e-4
    base.train_from_params(**params)
Ejemplo n.º 15
0
def train_cnn_fc_lstm():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'cnn_fc_lstm'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    # Set to True if starting training again
    params['load_params']['do_restore'] = False

    params['model_params'] = {
        'func': rnn_fc,
        'num_gpus': NUM_GPUS,
        'devices': DEVICES,
        'prefix': MODEL_PREFIX
    }

    params['learning_rate_params']['learning_rate'] = 1e-5
    base.train_from_params(**params)
Ejemplo n.º 16
0
def convrnn_imagenet_test():
    os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3,4,5,7,8,9'
    input_args = ['--gpu', '1,2,3,4,5,7,8,9']
    all_params = train_median_wfb(
            edges_arr=edges_5, 
            input_args=input_args)

    old_model_params = all_params.pop('model_params')
    def _temp_model_func(inputs, *args, **kwargs):
        output = convrnn_model(
                inputs['images'], input_args=input_args, 
                units=1000,
                *args, **kwargs)
        return output, {}
    new_model_params = {
            'func': _temp_model_func,
            'devices': old_model_params['devices'],
            'num_gpus': old_model_params['num_gpus'],
            }
    all_params['model_params'] = new_model_params
    all_params['save_params'] = {
            'host': 'localhost',
            'port': 27009,
            'dbname': 'convrnn',
            'collname': 'control',
            'exp_id': 'cate',
            'do_save': True,
            'save_initial_filters': True,
            'save_metrics_freq': 1000,
            'save_valid_freq': 10009,
            'save_filters_freq': 100090,
            'cache_filters_freq': 100090,
            }
    all_params['load_params'] = {
            'host': 'localhost',
            'port': 27009,
            'dbname': 'convrnn',
            'collname': 'control',
            'exp_id': 'cate',
            'do_restore': True,
            }
    all_params['validation_params'] = {}
    print(all_params.keys())
    base.train_from_params(**all_params)
Ejemplo n.º 17
0
def main():
    # Parse arguments
    cfg = get_config()
    args = cfg.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    # Get params needed, start training
    params = get_params_from_arg(args)
    if args.tfutils:
        params['loss_params']['agg_func'] = reg_loss_in_tfutils
        cache_dir = os.path.join(args.cache_dir, 'models_tfutils',
                                 args.db_name, args.col_name, args.exp_id)
        params['save_params']['cache_dir'] = cache_dir
        from tfutils import base
        base.train_from_params(**params)
    else:
        from framework import TrainFramework
        train_framework = TrainFramework(params)
        train_framework.train()
Ejemplo n.º 18
0
def train_cnn_lstm_dropout_fb():
    params = copy.deepcopy(default_params)
    params['save_params']['dbname'] = 'cnn_lstm_dropout_fb'
    params['save_params']['collname'] = stim_type
    params['save_params']['exp_id'] = 'trainval0'

    # Set to True if starting training again
    params['load_params']['do_restore'] = True

    params['model_params'] = {
        'func': convLstmDropout,
        'edges_arr': [('conv2', 'conv1')],
        'num_gpus': NUM_GPUS,
        'devices': DEVICES,
        'prefix': MODEL_PREFIX
    }

    params['learning_rate_params']['learning_rate'] = 1e-5
    base.train_from_params(**params)
Ejemplo n.º 19
0
def main(argv):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    del argv  # Unused
    params = Params()
    if FLAGS.load_params_file is not None:
        assert (FLAGS.load_params_file[-4:] == '.pkl')
        print("Loading params from file: {}".format(FLAGS.load_params_file))
        print("Ignoring all config flags")
        params.load(FLAGS.load_params_file, FLAGS)
    else:
        print("Parsing params from flags...")
        params.customize(flags=FLAGS)
    params_copy = params.get_params_copy()
    print("All params: ")
    print(params_copy)
    if FLAGS.save_params_file is not None:
        assert (FLAGS.save_params_file[-4:] == '.pkl')
        params.save(FLAGS.save_params_file)
    base.train_from_params(**params_copy)
Ejemplo n.º 20
0
    def test_training_save(self):
        """Illustrate saving to the grid file system during training time."""
        exp_id = 'training_save'
        params = self.setup_params(exp_id)

        # Modify a few of the save parameters.
        params['save_params']['save_valid_freq'] = 3000
        params['save_params']['save_filters_freq'] = 30000
        params['save_params']['cache_filters_freq'] = 3000

        # Specify additional save_params for saving to gfs.
        params['save_params']['save_to_gfs'] = ['first_image']
        params['train_params']['targets'] = {
            'func': self.get_first_image_target
        }

        # Actually run the training.
        base.train_from_params(**params)

        # Check that the first image has been saved.
        coll = self.collection['files']
        q = {'exp_id': exp_id, 'train_results': {'$exists': True}}
        train_steps = coll.find(q)
        self.assertEqual(train_steps.count(), 5)
        idx = train_steps[0]['_id']
        fn = coll.find({'item_for': idx})[0]['filename']
        fs = gridfs.GridFS(coll.database, self.collection_name)
        fh = fs.get_last_version(fn)
        saved_data = cPickle.loads(fh.read())
        fh.close()

        # Assert as expected.
        self.assertIn('train_results', saved_data)
        self.assertIn('first_image', saved_data['train_results'])
        self.assertEqual(len(saved_data['train_results']['first_image']), 100)
        self.assertEqual(saved_data['train_results']['first_image'][0].shape,
                         (28 * 28, ))
Ejemplo n.º 21
0
    def test_training(self):
        base_exp_id = 'training0'
        params = self.setup_params(base_exp_id)
        num_models = len(params['model_params'])

        # Actually run the training.
        base.train_from_params(**params)

        # Test if results are as expected.
        for i in range(num_models):
            exp_id = base_exp_id + '_model_{}'.format(i)
            self.assert_as_expected(exp_id, count=26, step=[0, 200, 400])
            r = self.collection['files'].find({'exp_id': exp_id, 'step': 0})[0]
            self.asserts_for_record(r, params, train=True)
            r = self.collection['files'].find({
                'exp_id': exp_id,
                'step': 20
            })[0]
            self.asserts_for_record(r, params, train=True)

        # Run another 500 steps of training on the same experiment id.
        params['train_params']['num_steps'] = 1000
        base.train_from_params(**params)

        # Test if results are as expected.
        for i in range(num_models):
            exp_id = base_exp_id + '_model_{}'.format(i)
            self.assert_as_expected(exp_id, 51, [0, 200, 400, 600, 800, 1000])
            try:  #idiotic thing to allow py2-3 compatiility
                eqmeth = self.assertItemsEqual
            except AttributeError:
                eqmeth = self.assertCountEqual
            eqmeth(self.collection['files'].distinct('exp_id'), [
                base_exp_id + '_model_{}'.format(i) for i in range(num_models)
            ])

            r = self.collection['files'].find({
                'exp_id': exp_id,
                'step': 1000
            })[0]
            self.asserts_for_record(r, params, train=True)

        # Run 500 more steps but save to a new experiment id.
        new_exp_id = 'training1'
        params['train_params']['num_steps'] = 1500
        params['load_params'] = {'exp_id': base_exp_id}
        params['save_params']['exp_id'] = new_exp_id

        base.train_from_params(**params)

        for i in range(num_models):
            exp_id = new_exp_id + '_model_{}'.format(i)
            self.assert_step(exp_id, [1200, 1400])
Ejemplo n.º 22
0
    def test_training(self):
        """Illustrate training.

        This test illustrates how basic training is performed using the
        tfutils.base.train_from_params function.  This is the first in a sequence of
        interconnected tests. It creates a pretrained model that is used by
        the next few tests (test_validation and test_feature_extraction).

        As can be seen by looking at how the test checks for correctness, after the
        training is run, results of training, including (intermittently) the full
        variables needed to re-initialize the tensorflow model, are stored in a
        MongoDB.

        Also see docstring of the tfutils.base.train_from_params function for more detailed
        information about usage.

        """
        exp_id = 'training0'
        params = self.setup_params(exp_id)

        # Run training.
        base.train_from_params(**params)

        # Test if results are as expected.
        self.assert_as_expected(exp_id, count=26, step=[0, 200, 400])
        r = self.collection['files'].find({'exp_id': exp_id, 'step': 0})[0]
        self.asserts_for_record(r, params, train=True)
        r = self.collection['files'].find({'exp_id': exp_id, 'step': 20})[0]
        self.asserts_for_record(r, params, train=True)

        # Run another 500 steps of training on the same experiment id.
        params['train_params']['num_steps'] = 1000
        base.train_from_params(**params)

        # Test if results are as expected.
        self.assert_as_expected(exp_id, 51, [0, 200, 400, 600, 800, 1000])
        self.assertEqual(self.collection['files'].distinct('exp_id'), [exp_id])

        r = self.collection['files'].find({'exp_id': exp_id, 'step': 1000})[0]
        self.asserts_for_record(r, params, train=True)

        # Run 500 more steps but save to a new experiment id.
        new_exp_id = 'training1'
        params['train_params']['num_steps'] = 1500
        params['load_params'] = {'exp_id': exp_id}
        params['save_params']['exp_id'] = new_exp_id

        base.train_from_params(**params)

        self.assert_step(new_exp_id, [1200, 1400])
Ejemplo n.º 23
0
def train(config,
          dbname,
          collname,
          exp_id,
          port,
          gpus=[0],
          use_default=True,
          load=True):

    tfutils_params = config['default_params'] if use_default else {}

    ### MODEL ###
    model_params = initialize_psgnet_model(config)
    loss_names = model_params['func'].Losses.keys()
    model_params.update({
        'devices': ['/gpu:' + str(i) for i in range(len(gpus))],
        'num_gpus': len(gpus),
        'seed': FLAGS.seed,
        'prefix': 'model_0'
    })
    tfutils_params['model_params'] = model_params

    ### INPUT DATA ###
    train_params, val_params = build_trainval_params(config,
                                                     loss_names=loss_names)
    update_tfutils_params('train', tfutils_params, train_params, config={})
    update_tfutils_params('validation', tfutils_params, val_params, config={})

    ### OPTIMIZATION ###
    trainable = FLAGS.trainable
    if trainable is not None:
        trainable = trainable.split(',')
    opt_params = {'trainable_scope': trainable}
    update_tfutils_params('optimizer', tfutils_params, opt_params, config)
    update_tfutils_params('loss', tfutils_params, {}, config)
    update_tfutils_params('learning_rate', tfutils_params, {}, config)

    ### SAVE AND LOAD ###
    save_params = {
        'dbname': dbname,
        'collname': collname,
        'exp_id': exp_id,
        'port': port
    }
    update_tfutils_params('save', tfutils_params, save_params, config)

    load_params = copy.deepcopy(save_params)
    load_exp_id = FLAGS.load_exp_id or exp_id
    load_params.update({
        'do_restore':
        True,
        'exp_id':
        load_exp_id,
        'query': {
            'step': FLAGS.step
        },
        'restore_global_step':
        True if (exp_id == load_exp_id) else False
    })
    update_tfutils_params('load', tfutils_params,
                          load_params if load else None, config)

    ### TODO save out config ###
    save_config(tfutils_params, save_dir=FLAGS.save_dir)

    logging.info(pprint.pformat(tfutils_params))
    base.train_from_params(**tfutils_params)
Ejemplo n.º 24
0
def validate_tpu(test_params):
    print("Validating only")
    print("All params: ")
    print(test_params)
    base.train_from_params(**test_params)
Ejemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser(description='The script to train the mask R-CNN')
    # System setting
    parser.add_argument('--gpu', default = '0', type = str, action = 'store', help = 'Index of gpu, currently only one gpu is allowed')

    # General setting
    parser.add_argument('--nport', default = 27017, type = int, action = 'store', help = 'Port number of mongodb')
    parser.add_argument('--expId', default = "maskrcnn", type = str, action = 'store', help = 'Name of experiment id')
    parser.add_argument('--cacheDirPrefix', default = "/mnt/fs0/chengxuz/", type = str, action = 'store', help = 'Prefix of cache directory')
    parser.add_argument('--batchsize', default = 1, type = int, action = 'store', help = 'Batch size, only 1 is supported now')
    parser.add_argument('--initlr', default = 0.002, type = float, action = 'store', help = 'Initial learning rate')

    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    exp_id  = args.expId
    dbname = 'normalnet-test'
    colname = 'maskrcnn'
    cache_dir = os.path.join(args.cacheDirPrefix, '.tfutils', 'localhost:'+ str(args.nport), dbname, colname, exp_id)
    BATCH_SIZE = args.batchsize
    n_threads = 4

    # Define all params
    train_data_param = {
                'func': COCO,
                'data_path': DATA_PATH,
                'group': 'train',
                'n_threads': n_threads,
                'batch_size': 1,
                'key_list': KEY_LIST,
            }
    train_queue_params = {
            'queue_type': 'random',
            'batch_size': BATCH_SIZE,
            'seed': 0,
            'capacity': 10
        }
    NUM_BATCHES_PER_EPOCH = 82783//BATCH_SIZE
    learning_rate_params = {
            'func': tf.train.exponential_decay,
            'learning_rate': args.initlr,
            'decay_rate': 0.94,
            'decay_steps': NUM_BATCHES_PER_EPOCH*2,  # exponential decay each epoch
            'staircase': True
        }
    model_params = {
            'func': pack_model
        }
    optimizer_class = tf.train.MomentumOptimizer
    optimizer_params = {
            'func': optimizer.ClipOptimizer,
            'optimizer_class': optimizer_class,
            'clip': True,
            'momentum': .99
        }
    save_params = {
            'host': 'localhost',
            'port': args.nport,
            'dbname': dbname,
            'collname': colname,
            'exp_id': exp_id,

            'do_save': True,
            'save_initial_filters': True,
            'save_metrics_freq': 2500,  # keeps loss from every SAVE_LOSS_FREQ steps.
            'save_valid_freq': 5000,
            'save_filters_freq': 5000,
            'cache_filters_freq': 5000,
            'cache_dir': cache_dir,
        }

    train_params = {
            'validate_first': False,
            'data_params': train_data_param,
            'queue_params': train_queue_params,
            'thres_loss': np.finfo(np.float32).max,
            'num_steps': 20 * NUM_BATCHES_PER_EPOCH  # number of steps to train
        }
    load_query = None
    load_params = {
            'host': 'localhost',
            'port': args.nport,
            'dbname': dbname,
            'collname': colname,
            'exp_id': exp_id,
            'do_restore': True,
            'query': load_query 
    }
    loss_func = pack_loss
    loss_params = {
            'targets': ['height', 'width', 'num_objects', 'labels', 'segmentation_masks', 'bboxes'],
            'agg_func': tf.reduce_mean,
            'loss_per_case_func': loss_func,
        }
    postsess_params = {
            'func': restore,
            }
    params = {
        'save_params': save_params,

        'load_params': load_params,

        'model_params': model_params,

        'train_params': train_params,

        'loss_params': loss_params,

        'learning_rate_params': learning_rate_params,

        'optimizer_params': optimizer_params,

        'postsess_params': postsess_params,

        'log_device_placement': False,  # if variable placement has to be logged
        'validation_params': {},
    }

    # Run the training
    base.train_from_params(**params)
Ejemplo n.º 26
0
            # RDM correlation
            retval['spearman_corrcoef_%s' % layer] = \
                    spearmanr(
                            np.reshape(retval['rdm_%s' % layer], [-1]),
                            np.reshape(retval['rdm_it'], [-1])
                            )[0]
            # categorization test
            retval['categorization_%s' % layer] = \
                    self.categorization_test(features[layer], meta, ['V6'])
            # within-categorization test
            retval['within_categorization_%s' % layer] = \
                    self.within_categorization_test(features[layer], meta, ['V6'])
            # IT regression test
            retval['it_regression_%s' % layer] = \
                    self.regression_test(features[layer], IT_feats, meta, ['V6'])
            # meta regression test
            retval['meta_regression_%s' % layer] = \
                    self.meta_regression_test(features[layer], meta, ['V6'])

        return retval


if __name__ == '__main__':
    """
    Illustrates how to run the configured model using tfutils
    """
    base.get_params()
    m = ImageNetExperiment()
    params = m.setup_params()
    base.train_from_params(**params)
Ejemplo n.º 27
0
def main(args):
    #cfg_initial = postprocess_config(json.load(open(cfgfile)))
    if args.gpu>-1:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    #cfg_initial = preprocess_config(json.load(open(args.pathconfig)))
    cfg_initial = postprocess_config(json.load(open(args.pathconfig)))
    exp_id  = args.expId
    cache_dir = os.path.join(args.cacheDirPrefix, '.tfutils', 'localhost:'+ str(args.nport), 'normalnet-test', 'normalnet', exp_id)

    #queue_capa = BATCH_SIZE*120
    #queue_capa = BATCH_SIZE*500
    BATCH_SIZE  = normal_encoder_asymmetric_with_bypass.getBatchSize(cfg_initial)
    if args.batchsize:
        BATCH_SIZE = args.batchsize
    queue_capa  = normal_encoder_asymmetric_with_bypass.getQueueCap(cfg_initial)
    n_threads   = 4

    func_net = getattr(normal_encoder_asymmetric_with_bypass, args.namefunc)

    train_data_param = {
                'func': Threedworld_hdf5,
                #'func': train_normalnet_hdf5.Threedworld,
                'data_path': DATA_PATH_hdf5,
                'group': 'train',
                'crop_size': IMAGE_SIZE_CROP,
                'n_threads': n_threads,
                'batch_size': 2,
            }
    val_data_param = {
                    'func': Threedworld_hdf5,
                    #'func': train_normalnet_hdf5.Threedworld,
                    'data_path': DATA_PATH_hdf5,
                    'group': 'val',
                    'crop_size': IMAGE_SIZE_CROP,
                    'n_threads': n_threads,
                    'batch_size': 2,
                }
    train_queue_params = {
                'queue_type': 'fifo',
                'batch_size': BATCH_SIZE,
                'seed': 0,
                'capacity': BATCH_SIZE*10,
            }
    val_queue_params    = train_queue_params
    val_target          = 'normals'

    if args.usehdf5==0:
        train_data_param['func']   = Threedworld
        val_data_param['func']     = Threedworld
        train_data_param['data_path']   = DATA_PATH
        val_data_param['data_path']   = DATA_PATH
        #train_data_param['n_threads'] = n_threads
        #val_data_param['n_threads'] = n_threads

        train_queue_params = {
                'queue_type': 'random',
                'batch_size': BATCH_SIZE,
                'seed': 0,
                'capacity': queue_capa,
                # 'n_threads' : 4
            }
        val_queue_params = {
                    'queue_type': 'fifo',
                    'batch_size': BATCH_SIZE,
                    'seed': 0,
                    'capacity': BATCH_SIZE*10,
                }
        val_target          = 'normals'

    if args.whichdataset==1:
        train_data_param['func']   = SceneNet
        val_data_param['func']     = SceneNet
        train_data_param['data_path']   = DATA_PATH_SCENE
        val_data_param['data_path']   = DATA_PATH_SCENE


    val_step_num = val_data_param['func'].N_VAL // BATCH_SIZE + 1
    NUM_BATCHES_PER_EPOCH = train_data_param['func'].N_TRAIN // BATCH_SIZE

    if args.valinum>-1:
        val_step_num = args.valinum

    loss_func = loss_ave_l2
    learning_rate_params = {
            'func': tf.train.exponential_decay,
            'learning_rate': .01,
            'decay_rate': .95,
            'decay_steps': NUM_BATCHES_PER_EPOCH,  # exponential decay each epoch
            'staircase': True
        }

    model_params = {
            'func': func_net,
            'seed': args.seed,
            'cfg_initial': cfg_initial
        }

    optim_params = {
            'func': optimizer.ClipOptimizer,
            'optimizer_class': tf.train.MomentumOptimizer,
            'clip': True,
            'momentum': .9
        }

    if args.whichloss==1:
        loss_func = loss_ave_invdot
        learning_rate_params = {
                'func': tf.train.exponential_decay,
                'learning_rate': .001,
                'decay_rate': .5,
                'decay_steps': NUM_BATCHES_PER_EPOCH,  # exponential decay each epoch
                'staircase': True
            }
        #optimizer_class     = tf.train.RMSPropOptimizer
        #train_data_param['center_im'] = True
        #val_data_param['center_im'] = True
        model_params['center_im']   = True
        optim_params = {
                'func': optimizer.ClipOptimizer,
                'optimizer_class': tf.train.RMSPropOptimizer,
                'clip': True,
            }

    params = {
        'save_params': {
            'host': 'localhost',
            #'port': 31001,
            'port': args.nport,
            'dbname': 'normalnet-test',
            'collname': 'normalnet',
            #'exp_id': 'trainval0',
            'exp_id': exp_id,
            #'exp_id': 'trainval2', # using screen?

            'do_save': True,
            #'do_save': False,
            'save_initial_filters': True,
            'save_metrics_freq': 2000,  # keeps loss from every SAVE_LOSS_FREQ steps.
            'save_valid_freq': 5000,
            #'save_metrics_freq': 100,  # keeps loss from every SAVE_LOSS_FREQ steps.
            #'save_valid_freq': 100,
            'save_filters_freq': 5000,
            'cache_filters_freq': 5000,
            'cache_dir': cache_dir,  # defaults to '~/.tfutils'
            'save_to_gfs': ['images_fea', 'normals_fea', 'outputs_fea'], 
            #'save_intermediate_freq': 1,
        },

        'load_params': {
            'host': 'localhost',
            # 'port': 31001,
            # 'dbname': 'alexnet-test',
            # 'collname': 'alexnet',
            # 'exp_id': 'trainval0',
            'port': args.nport,
            'dbname': 'normalnet-test',
            'collname': 'normalnet',
            #'exp_id': 'trainval0',
            'exp_id': exp_id,
            #'exp_id': 'trainval2', # using screen?
            'do_restore': True,
            'load_query': None
        },

        'model_params': model_params,

        'train_params': {
            #'validate_first': False,
            'validate_first': True,
            'data_params': train_data_param,
            'queue_params': train_queue_params,
            'thres_loss': 1000,
            'num_steps': 90 * NUM_BATCHES_PER_EPOCH  # number of steps to train
        },

        'loss_params': {
            'targets': val_target,
            'agg_func': tf.reduce_mean,
            'loss_per_case_func': loss_func,
            'loss_per_case_func_params': {}
        },

        'learning_rate_params': learning_rate_params,

        'optimizer_params': optim_params,
        'log_device_placement': False,  # if variable placement has to be logged
        'validation_params': {
            'topn': {
                'data_params': val_data_param,
                'queue_params': val_queue_params,
                'targets': {
                    'func': rep_loss,
                    'target': val_target,
                },
                #'num_steps': Threedworld.N_VAL // BATCH_SIZE + 1,
                'num_steps': val_step_num,
                'agg_func': lambda x: {k:np.mean(v) for k,v in x.items()},
                'online_agg_func': online_agg
            },
            'feats':{
                'data_params': val_data_param,
                'queue_params': val_queue_params,
                'targets': {
                    'func': save_features,
                    'num_to_save': 5,
                    'targets' : [],
                },
                #'num_steps': Threedworld.N_VAL // BATCH_SIZE + 1,
                'num_steps': 10,
                'agg_func': mean_losses_keep_rest,
                #'online_agg_func': online_agg
            },
        },
    }
    #base.get_params()
    base.train_from_params(**params)