def _get_optimizer(self): lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [GlobalNormClip(5), SummaryGradient()])
def optimizer(self): lr = tf.get_variable("learning_rate", initializer=0.001, trainable=False) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors( opt, [GlobalNormClip(5), SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [GlobalNormClip(5), SummaryGradient()])
def get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False) opt = tf.train.AdamOptimizer(lr) #what's ??? return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=2e-2, trainable=False) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors(opt, [GlobalNormClip(GRADIENT_CLIP)])
def optimizer(self): lr = tf.get_variable("learning_rate", initializer=self.params.lr, trainable=False) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
def get_training_params(model_cls, args, is_training=True): """ Data set specific params. Modify args for the specific data-set. """ model = None ds_train, ds_val, insrc_train, insrc_val = None, None, None, None args.steps_per_epoch = None lr_schedule = None has_cbs_init = False train_cbs = [] val_cbs = [] output_names = None output_funcs = None args.batch_size = scale_int_val_with_gpu(args.batch_size_per_gpu, args.nr_gpu) args.init_lr = args.init_lr_per_sample * args.batch_size if args.ds_name == 'cifar10' or args.ds_name == 'cifar100': if args.ds_name == 'cifar10': args.num_classes = 10 else: args.num_classes = 100 args.regularize_coef = 'decay' args.input_size = 32 fs.set_dataset_path(path=args.data_dir, auto_download=False) get_data = cifar.get_cifar_augmented_data if is_training: ds_train = get_data('train', args, do_multiprocess=True, do_validation=args.do_validation, shuffle=True) if args.training_type == 'darts_cifar': args.init_lr = 0.025 args.regularize_coef = 'const' args.regularize_const = 3e-4 lr = float(args.init_lr) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = args.init_model_epoch else: max_epoch = args.max_train_model_epoch max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.lr_decay_method = 'cosine' args.gradprocs = [GlobalNormClip(5)] args.max_epoch = max_epoch args.steps_per_epoch = ds_train.size() if args.do_remote_child_inf_runner or not is_training: ds_val = get_data('test', args, do_multiprocess=False, do_validation=args.do_validation, shuffle=False) elif args.ds_name == 'ilsvrc' or args.ds_name == 'imagenet': args.num_classes = 1000 args.input_size = 224 args.do_mean_std_gpu_process = True args.input_type = 'uint8' args.mean = imagenet.ilsvrc_mean args.std = imagenet.ilsvrc_std #args.s_type = 'imagenet' # make sure to check this... get_data = imagenet.get_ilsvrc_augmented_data if is_training: ds_train = get_data('train', args, do_multiprocess=True, is_train=True, shuffle=True) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 100 else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() args = imagenet.training_params_update(args) args.gradprocs = [GlobalNormClip(5)] if args.do_remote_child_inf_runner or not is_training: ds_val = get_data('val', args, do_multiprocess=True, is_train=False, shuffle=True) elif args.ds_name == 'tiny_imagenet': # fix data-set specific params args.num_classes = 200 args.input_size = 64 # transfer uint8 data and cast to float in gpu args.do_mean_std_gpu_process = True args.input_type = 'uint8' args.mean = get_augmented_data.ilsvrc_mean args.std = get_augmented_data.ilsvrc_std args.s_type = 'conv7' args.b_type = 'bottleneck' # training params args.regularize_coef = 'const' fs.set_dataset_path(path=args.data_dir, auto_download=False) get_data = tiny_imagenet.get_tiny_imagenet_augmented_data if is_training: ds_train = get_data('train', args, do_multiprocess=True, shuffle=True, is_train=True) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 100 else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() args = imagenet.training_params_update(args) args.gradprocs = [GlobalNormClip(10)] if args.do_remote_child_inf_runner or not is_training: ds_val = get_data('val', args, do_multiprocess=True, is_train=False) elif downsampled_imagenet.is_ds_name_downsampled_imagenet(args.ds_name): args.num_classes = 1000 args.input_size = downsampled_imagenet.ds_name_to_input_size( args.ds_name) args.regularize_coef = 'decay' args.b_type = 'bottleneck' get_data = downsampled_imagenet.get_downsampled_imagenet_augmented_data if is_training: ds_train = get_data('train', args, do_multiprocess=True, shuffle=True, do_validation=args.do_validation) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 100 else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() args = imagenet.training_params_update(args) args.gradprocs = [GlobalNormClip(10)] if args.do_remote_child_inf_runner or not is_training: ds_val = get_data('val', args, do_multiprocess=True, do_validation=args.do_validation) elif args.ds_name == 'speech_commands': args.regularize_coef = 'const' args.num_classes = len(speech_commands.DEFAULT_TRAIN_WORDS) + 2 get_data = speech_commands.get_augmented_speech_commands_data if is_training: ds_train = get_data('train', args, do_multiprocess=True, shuffle=True) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 90 else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() args = imagenet.training_params_update(args) args.gradprocs = [GlobalNormClip(10)] if args.do_remote_child_inf_runner or not is_training: val_split = 'val' if args.do_validation else 'test' ds_val = get_data(val_split, args, do_multiprocess=False, shuffle=False) elif args.ds_name == 'svhn': args.num_classes = 10 args.regularize_coef = 'decay' args.input_size = 32 fs.set_dataset_path(path=args.data_dir, auto_download=False) get_data = get_augmented_data.get_svhn_augmented_data ## Training model if is_training: ds_train = get_data('train', args, do_multiprocess=True, shuffle=True) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 60 else: max_epoch = 12 args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() args.lr_decay_method = 'cosine' args.gradprocs = [GlobalNormClip(5)] if args.do_remote_child_inf_runner or not is_training: ds_val = get_data('test', args, do_multiprocess=False, shuffle=False) elif args.ds_name.startswith('openml_'): int_start = args.ds_name.find('_') + 1 dataset_idx = int(args.ds_name[int_start:]) # Some arg protetection in case these are used in the future #assert not hasattr(args, 'mlp_input_types') and not hasattr(args, 'mlp_input_dims') (l_ds, args.mlp_input_types, args.mlp_input_dims, n_data, args.num_classes, args.mlp_feat_means, args.mlp_feat_stds) = openml.get_openml_dataflow( dataset_idx, args.data_dir, splits=['train', 'val'], do_validation=args.do_validation) ds_train = preprocess_data_flow(l_ds['train'], args, True) ds_val = preprocess_data_flow(l_ds['val'], args, False) logger.info("Dataset {} has {} samples and {} dims".format(\ args.ds_name, n_data, len(args.mlp_input_types))) if is_training: lr = float(args.init_lr) lr = float(args.init_lr) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = args.init_model_epoch else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() max_epoch = args.max_epoch lr_schedule = [(1, lr), (max_epoch // 2, lr * 1e-1), (max_epoch * 3 // 4, lr * 1e-2), (max_epoch * 7 // 8, lr * 1e-3)] if args.do_remote_child_inf_runner or not is_training: ds_val = preprocess_data_flow(l_ds['val'], args, True) elif args.ds_name == 'inat' or args.ds_name == 'inat100' or args.ds_name == 'inat1000' or args.ds_name == 'inat2017_1000': inat_lmdb_dir = None inat_year = '2018' if args.ds_name == 'inat': args.num_classes = 8142 n_allow = None elif args.ds_name == 'inat100': args.num_classes = 100 n_allow = 100 elif args.ds_name == 'inat1000': args.num_classes = 1000 n_allow = 1000 elif args.ds_name == 'inat2017_1000': args.num_classes = 1000 n_allow = 1000 inat_year = '2017' inat_lmdb_dir = 'inat2017_data/lmdb' args.input_size = 224 args.do_mean_std_gpu_process = True args.input_type = 'uint8' args.mean = inat.image_mean args.std = inat.image_std get_data = inat.get_inat_augmented_data if is_training: ds_train = get_data('train', args, lmdb_dir=inat_lmdb_dir, year=inat_year, do_multiprocess=True, do_validation=args.do_validation, is_train=True, shuffle=True, n_allow=n_allow) if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 100 else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.steps_per_epoch = ds_train.size() args = imagenet.training_params_update(args) args.gradprocs = [GlobalNormClip(5)] if args.do_remote_child_inf_runner or not is_training: ds_val = get_data('val', args, lmdb_dir=inat_lmdb_dir, year=inat_year, do_multiprocess=True, do_validation=args.do_validation, is_train=False, shuffle=True, n_allow=n_allow) elif args.ds_name == 'ptb': ptb_data_dir = os.path.join(args.data_dir, 'ptb_data') args.input_type = 'int32' args.search_cell_based = False # force single gpu for now. args.nr_gpu = 1 args, local_args = ptb.training_params_update(args) # evaluation/testing batch size change. if not is_training: if args.do_validation: args.batch_size_per_gpu = 64 else: args.batch_size_per_gpu = 64 # update globale batch size. args.batch_size = args.batch_size_per_gpu * args.nr_gpu args.init_lr = args.init_lr_per_sample * args.batch_size if is_training: var_size = not args.model_rnn_has_static_len ds_train = ptb.PennTreeBankDataFlow('train', ptb_data_dir, args.batch_size, args.model_rnn_max_len, var_size=var_size) args.steps_per_epoch = ds_train.size() args.model_rnn_vocab_size = ds_train.vocab_size if args.child_train_from_scratch and args.job_type == 'remote_child': max_epoch = 100 else: max_epoch = args.max_train_model_epoch args.max_epoch = (max_epoch + args.nr_gpu - 1) // args.nr_gpu args.gradprocs = [GlobalNormClip(local_args.grad_clip)] model = model_cls(args) # # compute some callbacks for training # We need to consturct the model now, as some op requires # the graph to be up. (shifting states and reset states) ptb.ptb_training_cbs(model, args, ptb_data_dir, train_cbs) has_cbs_init = True if args.do_remote_child_inf_runner or not is_training: ds_val = ptb.PennTreeBankDataFlow( 'valid' if args.do_validation else 'test', ptb_data_dir, args.batch_size, args.model_rnn_max_len, var_size=False) args.model_rnn_vocab_size = ds_val.vocab_size model = model_cls(args) # testing set up. # log loss of each sample output_names = [ #model.inference_update_tensor(name_only=True) + ':0', 'avg_batch_cost:0', 'seq_len:0', 'per_seq_sum_logloss:0', ] # the averaging is done automatically over the batches # We need to average over the time. # We exponentiate per prediction logloss for the perplexity score. output_funcs = [ #None, lambda x: x * args.batch_size, lambda x: x * args.batch_size, lambda x: np.exp(x / args.model_rnn_max_len), ] else: raise Exception("Unknown dataset {}".format(args.ds_name)) # computing epochs / steps / reading init learning rate. # Last section that may affect the args args.max_train_steps = None if is_training: args.candidate_gate_eps = 1.0 / args.steps_per_epoch / args.batch_size starting_epoch = 1 if args.model_dir is not None: ckpt = tf.train.latest_checkpoint(args.model_dir) if ckpt: starting_epoch = ann_app_utils.grep_starting_epoch( ckpt, args.steps_per_epoch) if lr_schedule: args.init_lr = ann_app_utils.grep_init_lr(starting_epoch, lr_schedule) if args.debug_child_max_epoch: args.max_epoch = args.debug_child_max_epoch if args.debug_steps_per_epoch: args.steps_per_epoch = args.debug_steps_per_epoch starting_epoch = 1 logger.info("Start at epoch {} with learning rate {}".format( starting_epoch, args.init_lr)) args.max_train_steps = args.steps_per_epoch * args.max_epoch if model is not None: model.options.max_train_steps = args.max_train_steps if model is None: # if the dataset specific does not init the model, we init it here model = model_cls(args) # From now on args should be const. if is_training: if not has_cbs_init: if ds_val and args.debug_steps_per_epoch: ds_val = FixedSizeData(ds_val, args.debug_steps_per_epoch) train_cbs.extend( _inference_runner_train_cbs(args, ds_val, insrc_val, val_cbs)) return (model, args, starting_epoch, lr_schedule, ds_train, insrc_train, train_cbs) else: if output_names is None: output_names = _inference_output_names(args) output_funcs = [None] * len(output_names) return (model, args, ds_val, insrc_val, output_names, output_funcs)
def get_gradient_processor(self): return [GlobalNormClip(400)]