def create_optimizer(self): lr = self.learning_rate_var epsilon = 1e-16 momentum = self.config.float("momentum", 0.0) optim_config = self.config.typed_value("optimizer") if optim_config: if isinstance(optim_config, str): optim_config = {"class": optim_config} assert isinstance(optim_config, dict) optim_config = optim_config.copy() optim_class_name = optim_config.pop("class") optim_class = get_optimizer_class(optim_class_name) from Util import collect_class_init_kwargs optim_class_kwargs = collect_class_init_kwargs(optim_class) if "epsilon" in optim_class_kwargs: optim_config.setdefault("epsilon", epsilon) if "momentum" in optim_class_kwargs and momentum: optim_config.setdefault("momentum", momentum) assert "learning_rate" not in optim_config, "learning_rate will be set implicitely" optim_config["learning_rate"] = lr print("Create optimizer %s with options %r." % (optim_class, optim_config), file=log.v2) optimizer = optim_class(**optim_config) assert isinstance(optimizer, tf.train.Optimizer) elif self.config.bool("adam", False): assert not momentum print("Create Adam optimizer.", file=log.v2) optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon=epsilon) elif self.config.bool("nadam", False): assert not momentum print("Create NAdam optimizer.", file=log.v2) optimizer = NadamOptimizer(learning_rate=lr, epsilon=epsilon) elif self.config.bool("adadelta", False): assert not momentum print("Create Adadelta optimizer.", file=log.v2) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr, epsilon=epsilon) elif self.config.bool("adagrad", False): assert not momentum print("Create Adagrad optimizer.", file=log.v2) optimizer = tf.train.AdagradOptimizer(learning_rate=lr) elif self.config.is_of_type("rmsprop", float): print("Create RMSProp optimizer. With Decay %f" % (self.config.float("rmsprop", 0.9)), file=log.v2) optimizer = tf.train.RMSPropOptimizer(decay=self.config.float("rmsprop", 0.9), learning_rate=lr, momentum=momentum, epsilon=epsilon) elif self.config.bool("rmsprop", False): print("Create RMSProp optimizer.", file=log.v2) optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, momentum=momentum, epsilon=epsilon) elif momentum: print("Create Momentum optimizer.", file=log.v2) optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=momentum) else: print("Create SGD optimizer.", file=log.v2) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.optimizer = optimizer self.reset_optim_op()
def _create_optimizer(self, optimizer_opts): """ :param dict[str]|str|None optimizer_opts: if dict, contains "class": opt_name. if str, then opt_name. :rtype: tf.train.Optimizer """ if optimizer_opts is None: return self._create_default_optimizer() lr = self.learning_rate epsilon = self.config.float("optimizer_epsilon", 1e-16) use_locking = self.use_locking momentum = self.config.float("momentum", 0.0) if isinstance(optimizer_opts, str): optimizer_opts = {"class": optimizer_opts} assert isinstance(optimizer_opts, dict) optimizer_opts = optimizer_opts.copy() if "class" in optimizer_opts: optim_class_name = optimizer_opts.pop("class") optim_class = get_optimizer_class(optim_class_name) else: _, default_opt = self._get_optimizer_item_for_opts(None, auto_create_new=True) optim_class = default_opt.__class__ from Util import collect_class_init_kwargs optim_class_kwargs = collect_class_init_kwargs(optim_class) if "epsilon" in optim_class_kwargs: optimizer_opts.setdefault("epsilon", epsilon) if "momentum" in optim_class_kwargs and momentum: optimizer_opts.setdefault("momentum", momentum) if "use_locking" in optim_class_kwargs and use_locking: optimizer_opts.setdefault("use_locking", use_locking) assert "learning_rate" not in optimizer_opts, "learning_rate will be set implicitly" if "learning_rate_multiplier" in optimizer_opts: lr *= optimizer_opts.pop("learning_rate_multiplier") optimizer_opts["learning_rate"] = lr print("Create optimizer %s with options %r." % (optim_class, optimizer_opts), file=log.v2) optimizer = optim_class(**optimizer_opts) assert isinstance(optimizer, tf.train.Optimizer) return optimizer
def traverse(model, layer_name, output_index): index = output_index mask = network.default_mask if not mask and 'mask' in model[layer_name].attrs: mask = model[layer_name].attrs['mask'] if 'from' in model[layer_name].attrs: x_in = [] for s in model[layer_name].attrs['from'].split(','): if s == 'data': x_in.append(SourceLayer(network.n_in, network.x, sparse=sparse_input, name='data', index=network.i)) index = network.i elif s != "null" and s != "": # this is allowed, recurrent states can be passed as input if not network.hidden.has_key(s): index = traverse(model, s, index) else: index = network.hidden[s].index x_in.append(network.hidden[s]) elif s == "": assert not s # Fix for old models via NetworkDescription. s = Layer.guess_source_layer_name(layer_name) if not s: # Fix for data input. Just like in NetworkDescription, so that param names are correct. x_in.append(SourceLayer(n_out=network.n_in, x_out=network.x, name="", index=network.i)) else: if not network.hidden.has_key(s): index = traverse(model, s, index) else: index = network.hidden[s].index # Add just like in NetworkDescription, so that param names are correct. x_in.append(SourceLayer(n_out=network.hidden[s].attrs['n_out'], x_out=network.hidden[s].output, name="", index=network.i)) else: x_in = [ SourceLayer(network.n_in, network.x, sparse=sparse_input, name='data', index=network.i) ] if 'encoder' in model[layer_name].attrs: encoder = [] for s in model[layer_name].attrs['encoder'].split(','): if s != "": if not network.hidden.has_key(s): traverse(model, s, index) encoder.append(network.hidden[s]) if 'base' in model[layer_name].attrs: # TODO see json base = [] for s in model[layer_name].attrs['base'].split(','): if s != "": if not network.hidden.has_key(s): traverse(model, s, index) base.append(network.hidden[s]) for key in ['copy_input', 'copy_output']: if key in model[layer_name].attrs: index = traverse(model, model[layer_name].attrs[key], index) if key == 'copy_input': copy_input = network.hidden[model[layer_name].attrs[key]] if key == 'copy_output': copy_output = network.hidden[model[layer_name].attrs[key]] if 'encoder' in model[layer_name].attrs and not x_in: index = output_index if 'target' in model[layer_name].attrs: target = model[layer_name].attrs['target'] if target != "null" and target not in network.y: network.use_target(target, dtype=dtype) index = network.j[target] cl = model[layer_name].attrs['class'] if cl == 'softmax': params = { 'dropout' : 0.0, 'name' : 'output', 'mask' : mask, 'train_flag' : train_flag } params.update(model[layer_name].attrs) if 'encoder' in model[layer_name].attrs: params['encoder'] = encoder #network.hidden[model[layer_name].attrs['encoder']] if model[layer_name].attrs['encoder'] in network.hidden else network.output[model[layer_name].attrs['encoder']] if 'base' in model[layer_name].attrs: params['base'] = base if 'copy_input' in model[layer_name].attrs: params['copy_input'] = copy_input if 'copy_output' in model[layer_name].attrs: params['copy_output'] = copy_output #if not 'target' in params: # params['target'] = target params['index'] = index #output_index params['sources'] = x_in params['y_in'] = network.y params.pop('from', None) params.pop('class', None) network.make_classifier(**params) else: params = { 'sources': x_in, 'n_out': model[layer_name].attrs['n_out'], 'dropout': model[layer_name].attrs['dropout'] if train_flag else 0.0, 'name': layer_name, 'mask': mask, 'train_flag' : train_flag, "eval_flag": eval_flag, 'network': network, 'index' : index } try: act = model[layer_name].attrs['activation'] params["activation"] = act except Exception: pass params['y_in'] = network.y layer_class = get_layer_class(cl) for p in collect_class_init_kwargs(layer_class): if p in params: continue # don't overwrite existing if p in model[layer_name].attrs.keys(): params[p] = model[layer_name].attrs[p] if 'encoder' in model[layer_name].attrs: params['encoder'] = encoder #network.hidden[model[layer_name].attrs['encoder']] if model[layer_name].attrs['encoder'] in network.hidden else network.output[model[layer_name].attrs['encoder']] if 'base' in model[layer_name].attrs: params['base'] = base if 'target' in model[layer_name].attrs: params['target'] = model[layer_name].attrs['target'] if layer_class.recurrent: network.recurrent = True return network.add_layer(layer_class(**params)).index
def create_optimizer(self): lr = self.get_current_step_learning_rate() epsilon = self.config.float("optimizer_epsilon", 1e-16) use_locking = self.use_locking momentum = self.config.float("momentum", 0.0) optim_config = self.config.typed_value("optimizer") if optim_config: if isinstance(optim_config, str): optim_config = {"class": optim_config} assert isinstance(optim_config, dict) optim_config = optim_config.copy() optim_class_name = optim_config.pop("class") optim_class = get_optimizer_class(optim_class_name) from Util import collect_class_init_kwargs optim_class_kwargs = collect_class_init_kwargs(optim_class) if "epsilon" in optim_class_kwargs: optim_config.setdefault("epsilon", epsilon) if "momentum" in optim_class_kwargs and momentum: optim_config.setdefault("momentum", momentum) if "use_locking" in optim_class_kwargs and use_locking: optim_config.setdefault("use_locking", use_locking) assert "learning_rate" not in optim_config, "learning_rate will be set implicitly" optim_config["learning_rate"] = lr print("Create optimizer %s with options %r." % (optim_class, optim_config), file=log.v2) optimizer = optim_class(**optim_config) assert isinstance(optimizer, tf.train.Optimizer) elif self.config.bool("adam", False): assert not momentum print("Create Adam optimizer.", file=log.v2) # Default TF values: learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8. # Default Keras values: lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8. # Our Theano default values: beta1=0.9, beta2=0.999, epsilon=1e-16 # https://github.com/openai/improved-gan/blob/master/imagenet/train_imagenet.py: beta1=0.5 optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon=epsilon, use_locking=use_locking) elif self.config.bool("nadam", False): assert_min_tf_version((1, 2, 0), "NadamOptimizer introduced in TF 1.2.0") assert not momentum print("Create NAdam optimizer.", file=log.v2) # TF default values: like Adam: beta1=0.9, beta2=0.999, epsilon=1e-8 # Our Theano default values: decay=0.004, beta1=0.9, beta2=0.999, epsilon=1e-8 from tensorflow.contrib.opt import NadamOptimizer optimizer = NadamOptimizer(learning_rate=lr, epsilon=epsilon, use_locking=use_locking) elif self.config.bool("adadelta", False): assert not momentum print("Create Adadelta optimizer.", file=log.v2) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr, epsilon=epsilon, use_locking=use_locking) elif self.config.bool("adagrad", False): assert not momentum print("Create Adagrad optimizer.", file=log.v2) optimizer = tf.train.AdagradOptimizer(learning_rate=lr, use_locking=use_locking) elif self.config.is_of_type("rmsprop", float): print("Create RMSProp optimizer. With Decay %f" % (self.config.float("rmsprop", 0.9)), file=log.v2) optimizer = tf.train.RMSPropOptimizer(decay=self.config.float( "rmsprop", 0.9), learning_rate=lr, momentum=momentum, epsilon=epsilon, use_locking=use_locking) elif self.config.bool("rmsprop", False): print("Create RMSProp optimizer.", file=log.v2) optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, momentum=momentum, epsilon=epsilon, use_locking=use_locking) elif momentum: print("Create Momentum optimizer.", file=log.v2) optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=momentum, use_locking=use_locking) else: print("Create SGD optimizer.", file=log.v2) optimizer = tf.train.GradientDescentOptimizer( learning_rate=lr, use_locking=use_locking) self.optimizer = optimizer self.reset_optim_op()