Esempio n. 1
0
 def create_optimizer(self):
   lr = self.learning_rate_var
   epsilon = 1e-16
   momentum = self.config.float("momentum", 0.0)
   optim_config = self.config.typed_value("optimizer")
   if optim_config:
     if isinstance(optim_config, str):
       optim_config = {"class": optim_config}
     assert isinstance(optim_config, dict)
     optim_config = optim_config.copy()
     optim_class_name = optim_config.pop("class")
     optim_class = get_optimizer_class(optim_class_name)
     from Util import collect_class_init_kwargs
     optim_class_kwargs = collect_class_init_kwargs(optim_class)
     if "epsilon" in optim_class_kwargs:
       optim_config.setdefault("epsilon", epsilon)
     if "momentum" in optim_class_kwargs and momentum:
       optim_config.setdefault("momentum", momentum)
     assert "learning_rate" not in optim_config, "learning_rate will be set implicitely"
     optim_config["learning_rate"] = lr
     print("Create optimizer %s with options %r." % (optim_class, optim_config), file=log.v2)
     optimizer = optim_class(**optim_config)
     assert isinstance(optimizer, tf.train.Optimizer)
   elif self.config.bool("adam", False):
     assert not momentum
     print("Create Adam optimizer.", file=log.v2)
     optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon=epsilon)
   elif self.config.bool("nadam", False):
     assert not momentum
     print("Create NAdam optimizer.", file=log.v2)
     optimizer = NadamOptimizer(learning_rate=lr, epsilon=epsilon)
   elif self.config.bool("adadelta", False):
     assert not momentum
     print("Create Adadelta optimizer.", file=log.v2)
     optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr, epsilon=epsilon)
   elif self.config.bool("adagrad", False):
     assert not momentum
     print("Create Adagrad optimizer.", file=log.v2)
     optimizer = tf.train.AdagradOptimizer(learning_rate=lr)
   elif self.config.is_of_type("rmsprop", float):
     print("Create RMSProp optimizer. With Decay %f" % (self.config.float("rmsprop", 0.9)), file=log.v2)
     optimizer = tf.train.RMSPropOptimizer(decay=self.config.float("rmsprop", 0.9), learning_rate=lr, momentum=momentum, epsilon=epsilon)
   elif self.config.bool("rmsprop", False):
     print("Create RMSProp optimizer.", file=log.v2)
     optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, momentum=momentum, epsilon=epsilon)
   elif momentum:
     print("Create Momentum optimizer.", file=log.v2)
     optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=momentum)
   else:
     print("Create SGD optimizer.", file=log.v2)
     optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
   self.optimizer = optimizer
   self.reset_optim_op()
Esempio n. 2
0
 def _create_optimizer(self, optimizer_opts):
   """
   :param dict[str]|str|None optimizer_opts: if dict, contains "class": opt_name. if str, then opt_name.
   :rtype: tf.train.Optimizer
   """
   if optimizer_opts is None:
     return self._create_default_optimizer()
   lr = self.learning_rate
   epsilon = self.config.float("optimizer_epsilon", 1e-16)
   use_locking = self.use_locking
   momentum = self.config.float("momentum", 0.0)
   if isinstance(optimizer_opts, str):
     optimizer_opts = {"class": optimizer_opts}
   assert isinstance(optimizer_opts, dict)
   optimizer_opts = optimizer_opts.copy()
   if "class" in optimizer_opts:
     optim_class_name = optimizer_opts.pop("class")
     optim_class = get_optimizer_class(optim_class_name)
   else:
     _, default_opt = self._get_optimizer_item_for_opts(None, auto_create_new=True)
     optim_class = default_opt.__class__
   from Util import collect_class_init_kwargs
   optim_class_kwargs = collect_class_init_kwargs(optim_class)
   if "epsilon" in optim_class_kwargs:
     optimizer_opts.setdefault("epsilon", epsilon)
   if "momentum" in optim_class_kwargs and momentum:
     optimizer_opts.setdefault("momentum", momentum)
   if "use_locking" in optim_class_kwargs and use_locking:
     optimizer_opts.setdefault("use_locking", use_locking)
   assert "learning_rate" not in optimizer_opts, "learning_rate will be set implicitly"
   if "learning_rate_multiplier" in optimizer_opts:
     lr *= optimizer_opts.pop("learning_rate_multiplier")
   optimizer_opts["learning_rate"] = lr
   print("Create optimizer %s with options %r." % (optim_class, optimizer_opts), file=log.v2)
   optimizer = optim_class(**optimizer_opts)
   assert isinstance(optimizer, tf.train.Optimizer)
   return optimizer
Esempio n. 3
0
    def traverse(model, layer_name, output_index):
      index = output_index
      mask = network.default_mask
      if not mask and 'mask' in model[layer_name].attrs:
        mask = model[layer_name].attrs['mask']
      if 'from' in model[layer_name].attrs:
        x_in = []
        for s in model[layer_name].attrs['from'].split(','):
          if s == 'data':
            x_in.append(SourceLayer(network.n_in, network.x, sparse=sparse_input, name='data', index=network.i))
            index = network.i
          elif s != "null" and s != "": # this is allowed, recurrent states can be passed as input
            if not network.hidden.has_key(s):
              index = traverse(model, s, index)
            else:
              index = network.hidden[s].index
            x_in.append(network.hidden[s])
          elif s == "":
            assert not s
            # Fix for old models via NetworkDescription.
            s = Layer.guess_source_layer_name(layer_name)
            if not s:
              # Fix for data input. Just like in NetworkDescription, so that param names are correct.
              x_in.append(SourceLayer(n_out=network.n_in, x_out=network.x, name="", index=network.i))
            else:
              if not network.hidden.has_key(s):
                index = traverse(model, s, index)
              else:
                index = network.hidden[s].index
              # Add just like in NetworkDescription, so that param names are correct.
              x_in.append(SourceLayer(n_out=network.hidden[s].attrs['n_out'], x_out=network.hidden[s].output, name="", index=network.i))
      else:
        x_in = [ SourceLayer(network.n_in, network.x, sparse=sparse_input, name='data', index=network.i) ]
      if 'encoder' in model[layer_name].attrs:
        encoder = []
        for s in model[layer_name].attrs['encoder'].split(','):
          if s != "":
            if not network.hidden.has_key(s):
              traverse(model, s, index)
            encoder.append(network.hidden[s])
      if 'base' in model[layer_name].attrs: # TODO see json
        base = []
        for s in model[layer_name].attrs['base'].split(','):
          if s != "":
            if not network.hidden.has_key(s):
              traverse(model, s, index)
            base.append(network.hidden[s])
      for key in ['copy_input', 'copy_output']:
        if key in model[layer_name].attrs:
          index = traverse(model, model[layer_name].attrs[key], index)
          if key == 'copy_input':
            copy_input = network.hidden[model[layer_name].attrs[key]]
          if key == 'copy_output':
            copy_output = network.hidden[model[layer_name].attrs[key]]
      if 'encoder' in model[layer_name].attrs and not x_in:
        index = output_index
      if 'target' in model[layer_name].attrs:
        target = model[layer_name].attrs['target']
        if target != "null" and target not in network.y:
          network.use_target(target, dtype=dtype)
          index = network.j[target]
      cl = model[layer_name].attrs['class']
      if cl == 'softmax':
        params = { 'dropout' : 0.0,
                   'name' : 'output',
                   'mask' : mask,
                   'train_flag' : train_flag }
        params.update(model[layer_name].attrs)
        if 'encoder' in model[layer_name].attrs:
          params['encoder'] = encoder #network.hidden[model[layer_name].attrs['encoder']] if model[layer_name].attrs['encoder'] in network.hidden else network.output[model[layer_name].attrs['encoder']]
        if 'base' in model[layer_name].attrs:
          params['base'] = base
        if 'copy_input' in model[layer_name].attrs:
          params['copy_input'] = copy_input
        if 'copy_output' in model[layer_name].attrs:
          params['copy_output'] = copy_output
        #if not 'target' in params:
        #  params['target'] = target
        params['index'] = index #output_index
        params['sources'] = x_in
        params['y_in'] = network.y
        params.pop('from', None)
        params.pop('class', None)
        network.make_classifier(**params)
      else:
        params = { 'sources': x_in,
                   'n_out': model[layer_name].attrs['n_out'],
                   'dropout': model[layer_name].attrs['dropout'] if train_flag else 0.0,
                   'name': layer_name,
                   'mask': mask,
                   'train_flag' : train_flag,
                   "eval_flag": eval_flag,
                   'network': network,
                   'index' : index }
        try:
          act = model[layer_name].attrs['activation']
          params["activation"] = act
        except Exception:
          pass
        params['y_in'] = network.y
        layer_class = get_layer_class(cl)
        for p in collect_class_init_kwargs(layer_class):
          if p in params: continue  # don't overwrite existing
          if p in model[layer_name].attrs.keys():
            params[p] = model[layer_name].attrs[p]
        if 'encoder' in model[layer_name].attrs:
          params['encoder'] = encoder #network.hidden[model[layer_name].attrs['encoder']] if model[layer_name].attrs['encoder'] in network.hidden else network.output[model[layer_name].attrs['encoder']]
        if 'base' in model[layer_name].attrs:
          params['base'] = base

        if 'target' in model[layer_name].attrs:
          params['target'] = model[layer_name].attrs['target']
        if layer_class.recurrent:
          network.recurrent = True
        return network.add_layer(layer_class(**params)).index
Esempio n. 4
0
 def create_optimizer(self):
     lr = self.get_current_step_learning_rate()
     epsilon = self.config.float("optimizer_epsilon", 1e-16)
     use_locking = self.use_locking
     momentum = self.config.float("momentum", 0.0)
     optim_config = self.config.typed_value("optimizer")
     if optim_config:
         if isinstance(optim_config, str):
             optim_config = {"class": optim_config}
         assert isinstance(optim_config, dict)
         optim_config = optim_config.copy()
         optim_class_name = optim_config.pop("class")
         optim_class = get_optimizer_class(optim_class_name)
         from Util import collect_class_init_kwargs
         optim_class_kwargs = collect_class_init_kwargs(optim_class)
         if "epsilon" in optim_class_kwargs:
             optim_config.setdefault("epsilon", epsilon)
         if "momentum" in optim_class_kwargs and momentum:
             optim_config.setdefault("momentum", momentum)
         if "use_locking" in optim_class_kwargs and use_locking:
             optim_config.setdefault("use_locking", use_locking)
         assert "learning_rate" not in optim_config, "learning_rate will be set implicitly"
         optim_config["learning_rate"] = lr
         print("Create optimizer %s with options %r." %
               (optim_class, optim_config),
               file=log.v2)
         optimizer = optim_class(**optim_config)
         assert isinstance(optimizer, tf.train.Optimizer)
     elif self.config.bool("adam", False):
         assert not momentum
         print("Create Adam optimizer.", file=log.v2)
         # Default TF values: learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8.
         # Default Keras values: lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8.
         # Our Theano default values: beta1=0.9, beta2=0.999, epsilon=1e-16
         # https://github.com/openai/improved-gan/blob/master/imagenet/train_imagenet.py: beta1=0.5
         optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                            epsilon=epsilon,
                                            use_locking=use_locking)
     elif self.config.bool("nadam", False):
         assert_min_tf_version((1, 2, 0),
                               "NadamOptimizer introduced in TF 1.2.0")
         assert not momentum
         print("Create NAdam optimizer.", file=log.v2)
         # TF default values: like Adam: beta1=0.9, beta2=0.999, epsilon=1e-8
         # Our Theano default values: decay=0.004, beta1=0.9, beta2=0.999, epsilon=1e-8
         from tensorflow.contrib.opt import NadamOptimizer
         optimizer = NadamOptimizer(learning_rate=lr,
                                    epsilon=epsilon,
                                    use_locking=use_locking)
     elif self.config.bool("adadelta", False):
         assert not momentum
         print("Create Adadelta optimizer.", file=log.v2)
         optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr,
                                                epsilon=epsilon,
                                                use_locking=use_locking)
     elif self.config.bool("adagrad", False):
         assert not momentum
         print("Create Adagrad optimizer.", file=log.v2)
         optimizer = tf.train.AdagradOptimizer(learning_rate=lr,
                                               use_locking=use_locking)
     elif self.config.is_of_type("rmsprop", float):
         print("Create RMSProp optimizer. With Decay %f" %
               (self.config.float("rmsprop", 0.9)),
               file=log.v2)
         optimizer = tf.train.RMSPropOptimizer(decay=self.config.float(
             "rmsprop", 0.9),
                                               learning_rate=lr,
                                               momentum=momentum,
                                               epsilon=epsilon,
                                               use_locking=use_locking)
     elif self.config.bool("rmsprop", False):
         print("Create RMSProp optimizer.", file=log.v2)
         optimizer = tf.train.RMSPropOptimizer(learning_rate=lr,
                                               momentum=momentum,
                                               epsilon=epsilon,
                                               use_locking=use_locking)
     elif momentum:
         print("Create Momentum optimizer.", file=log.v2)
         optimizer = tf.train.MomentumOptimizer(learning_rate=lr,
                                                momentum=momentum,
                                                use_locking=use_locking)
     else:
         print("Create SGD optimizer.", file=log.v2)
         optimizer = tf.train.GradientDescentOptimizer(
             learning_rate=lr, use_locking=use_locking)
     self.optimizer = optimizer
     self.reset_optim_op()