Ejemplo n.º 1
0
 def get(self, name=None):
     params = {} if self.params is None else self.params
     if self.opt_name == "Adam":
         if name is None:
             return AdamOptimizer(**params)
         else:
             return AdamOptimizer(name=name, **params)
     elif self.opt_name == "Adadelta":
         if name is None:
             return AdadeltaOptimizer(**params)
         else:
             return AdadeltaOptimizer(name=name, **params)
     else:
         raise NotImplemented()
Ejemplo n.º 2
0
  def test_tfoptimizer_iterations(self):
    with self.test_session():
      optimizer = keras.optimizers.TFOptimizer(AdamOptimizer(0.01))
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(
          2, input_shape=(3,), kernel_constraint=keras.constraints.MaxNorm(1)))
      model.compile(loss='mean_squared_error', optimizer=optimizer)
      keras.backend.track_tf_optimizer(optimizer)
      self.assertEqual(keras.backend.get_value(model.optimizer.iterations), 0)

      model.fit(np.random.random((55, 3)),
                np.random.random((55, 2)),
                epochs=1,
                batch_size=5,
                verbose=0)
      self.assertEqual(keras.backend.get_value(model.optimizer.iterations), 11)

      if not context.executing_eagerly():
        # TODO(kathywu): investigate why training with an array input and
        # setting the argument steps_per_epoch does not work in eager mode.
        model.fit(np.random.random((20, 3)),
                  np.random.random((20, 2)),
                  steps_per_epoch=8,
                  verbose=0)
        self.assertEqual(
            keras.backend.get_value(model.optimizer.iterations), 19)
        def _ModelFn(features, labels, mode):
            if is_training:
                logits_out = self._BuildGraph(features)
            else:
                graph_def = self._GetGraphDef(use_trt, batch_size, model_dir)
                logits_out = importer.import_graph_def(
                    graph_def,
                    input_map={INPUT_NODE_NAME: features},
                    return_elements=[OUTPUT_NODE_NAME + ':0'],
                    name='')[0]

            loss = losses.sparse_softmax_cross_entropy(labels=labels,
                                                       logits=logits_out)
            summary.scalar('loss', loss)

            classes_out = math_ops.argmax(logits_out,
                                          axis=1,
                                          name='classes_out')
            accuracy = metrics.accuracy(labels=labels,
                                        predictions=classes_out,
                                        name='acc_op')
            summary.scalar('accuracy', accuracy[1])

            if mode == ModeKeys.EVAL:
                return EstimatorSpec(mode,
                                     loss=loss,
                                     eval_metric_ops={'accuracy': accuracy})
            elif mode == ModeKeys.TRAIN:
                optimizer = AdamOptimizer(learning_rate=1e-2)
                train_op = optimizer.minimize(loss,
                                              global_step=get_global_step())
                return EstimatorSpec(mode, loss=loss, train_op=train_op)
Ejemplo n.º 4
0
 def test_TF_LearningRateScheduler_Adam(self):
   with self.test_session():
     with context.eager_mode():
       np.random.seed(1337)
       (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
           train_samples=TRAIN_SAMPLES,
           test_samples=TEST_SAMPLES,
           input_shape=(INPUT_DIM,),
           num_classes=NUM_CLASSES)
       y_test = keras.utils.to_categorical(y_test)
       y_train = keras.utils.to_categorical(y_train)
       model = keras.models.Sequential()
       model.add(
           keras.layers.Dense(
               NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
       model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
       model.compile(
           loss='categorical_crossentropy',
           optimizer=AdamOptimizer(),
           metrics=['accuracy'])
       cbks = [keras.callbacks.LearningRateScheduler(lambda x: 1. / (1. + x))]
       model.fit(
           x_train,
           y_train,
           batch_size=BATCH_SIZE,
           validation_data=(x_test, y_test),
           callbacks=cbks,
           epochs=5,
           verbose=0)
       opt_lr = model.optimizer.optimizer._lr
       self.assertLess(
           float(keras.backend.get_value(
               Variable(opt_lr))) - 0.2, keras.backend.epsilon())
Ejemplo n.º 5
0
    def __init__(self):
        self.graph = None
        self.x = None
        self.y = None
        self.train_step = None
        self.global_step = None
        self.loss = None
        self.accuracy = None
        self.predict = None
        self.saver = None
        self.summary = None
        self.train_summary_writer = None
        self.valid_summary_writer = None
        self.test_summary_writer = None
        self.regularizer = None
        self.hidden_activation = tf.nn.elu
        self.optimizer = AdamOptimizer(learning_rate=init_learning_rate)

        self.run_dir = None
        self.checkpoint_path = None
        self.checkpoint_epoch_path = None
        self.log_train_dir = None
        self.log_valid_dir = None
        self.log_test_dir = None

        self.n_epochs = 40
        self.batch_size = 50
        self.n_batches = n_samples // self.batch_size
        self.continue_training = True
def build_model(params):
    model = Sequential()
    if params['pretrained']:
        model.add(Embedding(params['vocab_size'], params['embedding_dim'], weights=[glove.custom_embedding_matrix],
                            input_length=params['max_answer_len'], trainable=False))
    else:
        model.add(Embedding(params['vocab_size'], params['embedding_dim'], input_length=params['max_answer_len']))

    model.add(Dropout(params['dropout']))
    if params['flatten']:
        model.add(Flatten())
        model.add(Reshape((1, params['embedding_dim'] * params['max_answer_len'])))
    if params['lstm_dim_2']:
        model.add(LSTM(params['lstm_dim_1'], return_sequences=True))
        model.add(LSTM(params['lstm_dim_2'], return_sequences=False))
    else:
        model.add(LSTM(params['lstm_dim_1'], return_sequences=False))
    model.add(Dropout(params['dropout']))
    model.add(Dense(1, activation="linear"))

    # compile the model
    optimizer = AdamOptimizer()
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['acc'])
    print(model.summary())
    return model
Ejemplo n.º 7
0
    def test_tfoptimizer_iterations(self):
        with self.test_session():
            optimizer = keras.optimizers.TFOptimizer(AdamOptimizer(0.01))
            model = keras.models.Sequential()
            model.add(
                keras.layers.Dense(
                    2,
                    input_shape=(3, ),
                    kernel_constraint=keras.constraints.MaxNorm(1)))
            model.compile(loss='mean_squared_error', optimizer=optimizer)
            self.assertEqual(
                keras.backend.get_value(model.optimizer.iterations), 0)

            model.fit(np.random.random((55, 3)),
                      np.random.random((55, 2)),
                      epochs=1,
                      batch_size=5,
                      verbose=0)
            self.assertEqual(
                keras.backend.get_value(model.optimizer.iterations), 11)

            model.fit(np.random.random((20, 3)),
                      np.random.random((20, 2)),
                      steps_per_epoch=8,
                      verbose=0)
            self.assertEqual(
                keras.backend.get_value(model.optimizer.iterations), 19)
Ejemplo n.º 8
0
 def test_tf_optimizer(self):
     if context.executing_eagerly():
         self.skipTest('v1 optimizer does not run in eager mode')
     optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01))
     model = keras.models.Sequential()
     model.add(
         keras.layers.Dense(2,
                            input_shape=(3, ),
                            kernel_constraint=keras.constraints.MaxNorm(1)))
     # This is possible
     model.compile(loss='mean_squared_error',
                   optimizer=optimizer,
                   run_eagerly=testing_utils.should_run_eagerly())
     keras.backend.track_tf_optimizer(optimizer)
     model.fit(np.random.random((5, 3)),
               np.random.random((5, 2)),
               epochs=1,
               batch_size=5,
               verbose=0)
     # not supported
     with self.assertRaises(NotImplementedError):
         _ = optimizer.weights
     with self.assertRaises(NotImplementedError):
         optimizer.get_config()
     with self.assertRaises(NotImplementedError):
         optimizer.from_config(None)
Ejemplo n.º 9
0
    def test_tf_optimizer_iterations(self):
        if context.executing_eagerly():
            self.skipTest('v1 optimizer does not run in eager mode')
        with self.cached_session():
            optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01))
            model = keras.models.Sequential()
            model.add(
                keras.layers.Dense(
                    2,
                    input_shape=(3, ),
                    kernel_constraint=keras.constraints.MaxNorm(1)))
            model.compile(loss='mean_squared_error',
                          optimizer=optimizer,
                          run_eagerly=testing_utils.should_run_eagerly())
            keras.backend.track_tf_optimizer(optimizer)
            self.assertEqual(
                keras.backend.get_value(model.optimizer.iterations), 0)

            model.fit(np.random.random((55, 3)),
                      np.random.random((55, 2)),
                      epochs=1,
                      batch_size=5,
                      verbose=0)
            self.assertEqual(
                keras.backend.get_value(model.optimizer.iterations), 11)
Ejemplo n.º 10
0
 def test_optimizer_garbage_collection(self):
   graph = ops.Graph()
   with graph.as_default():
     optimizer = keras.optimizers.TFOptimizer(AdamOptimizer(0.01))
     keras.backend.track_tf_optimizer(optimizer)
     optimizer_weak = weakref.ref(optimizer)
   graph_weak = weakref.ref(graph)
   del graph, optimizer
   gc.collect()
   # Check that the weak references are dead now.
   self.assertIs(graph_weak(), None)
   self.assertIs(optimizer_weak(), None)
Ejemplo n.º 11
0
    def get(self, name=None, lr_decay=None, global_step=None):
        params = {} if self.params is None else self.params.copy()
        with tf.variable_scope('opt'):
            lr_tensor = tf.get_variable('lr',
                                        dtype=tf.float32,
                                        initializer=tf.constant(
                                            params['learning_rate']),
                                        trainable=False)
            if lr_decay is not None:
                params['learning_rate'] = lr_decay(
                    learning_rate=params['learning_rate'],
                    global_step=global_step,
                    name='lr_decay')

            self.lr_op = lr_tensor if lr_decay is None else lr_tensor.assign(
                params['learning_rate'])
            params['learning_rate'] = self.lr_op
        if self.opt_name == "Adam":
            if name is None:
                return AdamOptimizer(**params)
            else:
                return AdamOptimizer(name=name, **params)
        elif self.opt_name == "Adadelta":
            if name is None:
                return AdadeltaOptimizer(**params)
            else:
                return AdadeltaOptimizer(name=name, **params)
        elif self.opt_name == "RMSprop":
            if name is None:
                return RMSPropOptimizer(**params)
            else:
                return RMSPropOptimizer(name=name, **params)
        elif self.opt_name == "Momentum":
            if name is None:
                return MomentumOptimizer(**params)
            else:
                return MomentumOptimizer(name=name, **params)
        else:
            raise NotImplemented()
Ejemplo n.º 12
0
 def test_optimizer_garbage_collection(self):
     if context.executing_eagerly():
         self.skipTest('v1 optimizer does not run in eager mode')
     graph = ops.Graph()
     with graph.as_default():
         optimizer = optimizer_v1.TFOptimizer(AdamOptimizer(0.01))
         keras.backend.track_tf_optimizer(optimizer)
         optimizer_weak = weakref.ref(optimizer)
     graph_weak = weakref.ref(graph)
     del graph, optimizer
     gc.collect()
     # Check that the weak references are dead now.
     self.assertIs(graph_weak(), None)
     self.assertIs(optimizer_weak(), None)
Ejemplo n.º 13
0
    def add_optimizer(self):
        global_step = tf.Variable(0, trainable=False)

        learning_rate = tf.train.exponential_decay(0.00001,
                                                   global_step,
                                                   10,
                                                   0.1,
                                                   staircase=True)

        optimizer = AdamOptimizer(learning_rate)
        gradients = optimizer.compute_gradients(self.total_loss)

        for grad, var in gradients:
            if grad is not None:
                tf.histogram_summary(var.op.name + '/gradients', grad)
        apply_gradient_op = optimizer.apply_gradients(gradients, global_step)
        return apply_gradient_op
Ejemplo n.º 14
0
 def test_mixed_precision_loss_scale_optimizer(self):
     if context.executing_eagerly():
         self.skipTest('v1 optimizer does not run in eager mode')
     optimizer = MixedPrecisionLossScaleOptimizer(AdamOptimizer(),
                                                  'dynamic')
     model = keras.models.Sequential()
     model.add(
         keras.layers.Dense(2,
                            input_shape=(3, ),
                            kernel_constraint=keras.constraints.MaxNorm(1)))
     model.compile(loss='mean_squared_error',
                   optimizer=optimizer,
                   run_eagerly=testing_utils.should_run_eagerly())
     model.fit(np.random.random((5, 3)),
               np.random.random((5, 2)),
               epochs=1,
               batch_size=5,
               verbose=0)
Ejemplo n.º 15
0
 def test_tfoptimizer(self):
   optimizer = keras.optimizers.TFOptimizer(AdamOptimizer(0.01))
   model = keras.models.Sequential()
   model.add(keras.layers.Dense(
       2, input_shape=(3,), kernel_constraint=keras.constraints.MaxNorm(1)))
   # This is possible
   model.compile(loss='mean_squared_error', optimizer=optimizer)
   keras.backend.track_tf_optimizer(optimizer)
   model.fit(np.random.random((5, 3)),
             np.random.random((5, 2)),
             epochs=1,
             batch_size=5,
             verbose=0)
   # not supported
   with self.assertRaises(NotImplementedError):
     _ = optimizer.weights
   with self.assertRaises(NotImplementedError):
     optimizer.get_config()
   with self.assertRaises(NotImplementedError):
     optimizer.from_config(None)
Ejemplo n.º 16
0
def get_conv_classifier():
    n_classes = 5
    feature_columns = [layers.real_valued_column("", dimension=3)]

    # learning_rate = 1.0
    # optimizer = AdagradOptimizer(learning_rate)
    #
    # learning_rate = 1.0
    # optimizer = AdadeltaOptimizer(learning_rate=learning_rate)

    # ~ 62.55%
    learning_rate = 0.01
    optimizer = AdamOptimizer(learning_rate, epsilon=0.1)

    # learning_rate = 0.05
    # optimizer = GradientDescentOptimizer(learning_rate)

    # learning_rate = 0.1
    # optimizer = RMSPropOptimizer(learning_rate, momentum=0.1)

    # learning_rate = 0.1
    # optimizer = FtrlOptimizer(learning_rate)

    return SKCompat(
        Estimator(
            model_fn=get_conv_model,
            params={
                'head':
                head_lib._multi_class_head(  # pylint: disable=protected-access
                    n_classes,
                    enable_centered_bias=False),
                'feature_columns':
                feature_columns,
                'activation_fn':
                tf.nn.relu,
                'learning_rate':
                learning_rate,
                'optimizer':
                optimizer
            },
            model_dir='saved_model'))
Ejemplo n.º 17
0
    def __init__(self, inputs, network, check_point="dqn.ckpt"):
        self.saver = tf.train.Saver()
        self.summary_writer = tf.summary.FileWriter("/tmp/dqn")
        self.inputs = inputs
        self.network = network
        self.targets = tf.placeholder(tf.float32, shape=(None, self.output_shape[1]))
        summary_names = ["actions", "loss", "exploration_rate", "fruits_eaten", "timesteps_survived"]

        self.summary_placeholders = {name: tf.placeholder(dtype=tf.float32) for name in summary_names}

        # self.summary_placeholders = [tf.placeholder(dtype=summary_variables[i].dtype)
        #                              for i in range(len(summary_names))]

        # summary_ops = [tf.assign(summary_variables[i],self.summary_placeholders[i])
        #                for i in range(len(summary_names))

        summary = [tf.summary.histogram(summary_names[i], self.summary_placeholders[summary_names[i]]) for i in
                   range(1)]
        summary += [tf.summary.scalar(summary_names[i], self.summary_placeholders[summary_names[i]]) for i in
                    range(1, len(summary_names))]

        self.summary_ops = tf.summary.merge_all()

        self.loss = tf.losses.mean_squared_error(self.network, self.targets)
        optimizer = AdamOptimizer()
        self.train_step = optimizer.minimize(loss=self.loss)
        #
        # with tf.colocate_with(global_step):
        #     self.update_op = tf.assign_add(global_step, 1)

        self.sess = tf.Session()

        self.summary_writer.add_graph(tf.get_default_graph())

        with self.sess.as_default():
            tf.global_variables_initializer().run()

        if os.path.exists(check_point):
            self.saver.restore(self.sess, check_point)
Ejemplo n.º 18
0
    def __init__(self, options, data_train, session=None):
        self.statistics = DBQAStatistics.from_data(data_train)
        self.options = options

        self.optimizer = AdamOptimizer()
        self.global_step = tf.train.get_or_create_global_step()

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        self.question_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.question_bigram_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.answer_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.answer_bigram_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.wrong_answer_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.wrong_answer_bigram_2d_pl = tf.placeholder(tf.int32, (None, None))

        self.network = PairwiseSimilarity(options, self.statistics)
        self.loss, self.accuracy = self.network.get_loss(
            self.question_2d_pl,
            self.question_bigram_2d_pl,
            self.answer_2d_pl,
            self.answer_bigram_2d_pl,
            self.wrong_answer_2d_pl,
            self.wrong_answer_bigram_2d_pl,
        )

        self.similarity = self.network.get_similarity(
            self.question_2d_pl, self.question_bigram_2d_pl, self.answer_2d_pl,
            self.answer_bigram_2d_pl)

        self.optimize_op = self.optimizer.minimize(
            self.loss, global_step=self.global_step)

        if session is None:
            self.session = self.create_session()
            self.session.run(tf.global_variables_initializer())
        else:
            self.session = session
        self.random = Random(42)
Ejemplo n.º 19
0
    def _add_optimizer(self):
        self.optimizer = AdamOptimizer()

        self.final_train_loss = self.main_train_loss

        with tf.variable_scope('l2_regularization'):
            # Find variables to regularize by iterating over all variables and checking if in set. Haven't found way to
            # directly get variables by absolute path.
            l2_regularized_names = {
                'encoder/bidirectional_rnn/fw/gru_cell/gates/weights:0'
                # If used, add additional complete variables names
            }
            l2_regularized = [
                variable for variable in tf.trainable_variables()
                if variable.name in l2_regularized_names
            ]

            l2_loss = 0.001 * tf.add_n(
                [tf.nn.l2_loss(variable) for variable in l2_regularized])

        gradients = self.optimizer.compute_gradients(self.final_train_loss)

        with tf.variable_scope('gradient_clipping'):

            def clip_gradient(gradient, variable):
                # Only clip normal tensors, IndexedSlices gives warning otherwise
                if isinstance(gradient, tf.Tensor):
                    gradient = tf.clip_by_norm(gradient, 10)
                return gradient, variable

            gradients = [
                clip_gradient(gradient, variable)
                for gradient, variable in gradients
            ]
        self.minimize_operation = self.optimizer.apply_gradients(
            gradients, global_step=self.global_step)
Ejemplo n.º 20
0
    def __init__(self, args):
        self.inputs = tf.placeholder(
            tf.int32, shape=[args.batch_size, args.sequence_length])
        self.targets = tf.placeholder(
            tf.int32, shape=[args.batch_size, args.sequence_length])
        with tf.name_scope("embedding"):
            embedding_size = int(sqrt(args.vocab_source_size) + 1)
            embedding = tf.get_variable(
                'embedding',
                shape=[args.vocab_source_size,
                       embedding_size],  #embed them in a small space
                initializer=tf.contrib.layers.xavier_initializer())
            embedded = tf.nn.embedding_lookup(embedding, self.inputs)
            #tensor of shape [batch_size*sequence_length*embedding_size]
            embedded_inputs = tf.unpack(embedded, axis=0)
            #assert embedded_inputs[0].get_shape() == (args.batch_size,args.sequence_length,embedding_size)

            #reshape it to a list of timesteps
            embedded_inputs_by_timestamp = [
                tf.reshape(i, (args.batch_size, embedding_size))
                for i in tf.split(1, args.sequence_length, embedded)
            ]
            assert len(embedded_inputs_by_timestamp) == args.sequence_length
            for timestep in embedded_inputs_by_timestamp:
                assert timestep.get_shape() == (args.batch_size,
                                                embedding_size)

        with tf.variable_scope("bidi_rnn") as bidi_scope:
            cell = LSTM_factory(args.hidden_size,
                                args.num_layers,
                                dropout=args.dropout)
            outputs, fwd_state, bwd_state = tf.nn.bidirectional_rnn(
                cell_fw=cell,
                cell_bw=cell,
                inputs=embedded_inputs_by_timestamp,
                dtype=tf.float32)

        with tf.variable_scope("decoder_rnn"):
            decoder_cell = LSTM_factory(args.hidden_size,
                                        args.num_layers * 2,
                                        dropout=args.dropout)
            decoder_cell = AttentionCellWrapper(cell=decoder_cell,
                                                attn_length=args.hidden_size,
                                                state_is_tuple=True)
            final_outputs, state = tf.nn.rnn(cell=decoder_cell,
                                             inputs=outputs,
                                             dtype=tf.float32)

        with tf.variable_scope("logits") as logits_scope:
            # Reshaping to apply the same weights over the timesteps
            outputs = tf.pack(final_outputs)
            outputs = tf.transpose(outputs, [1, 0, 2])

            logits = tf.contrib.layers.fully_connected(
                inputs=outputs,
                num_outputs=args.vocab_target_size,
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer(),
                scope=logits_scope)

            self.logits = logits

        with tf.variable_scope("loss"):
            #flat_targets = tf.reshape(self.targets, [-1])
            #flat_logits = tf.reshape(logits, [-1, args.vocab_target_size])
            assert logits.get_shape()[:-1] == self.targets.get_shape(
            ), 'l = {0} t = {1}'.format(logits.get_shape(),
                                        self.targets.get_shape())
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, self.targets)

            batch_loss = tf.reduce_sum(losses, name="batch_loss")
            tf.contrib.losses.add_loss(batch_loss)
            total_loss = tf.contrib.losses.get_total_loss()

            # Add summaries.
            tf.scalar_summary("batch_loss", batch_loss)
            tf.scalar_summary("total_loss", total_loss)

            self.total_loss = total_loss
            self.batch_loss = batch_loss
            self.target_cross_entropy_losses = losses  # Used in evaluation.

        with tf.name_scope("optimization"):
            opt = AdamOptimizer(learning_rate=args.learning_rate)
            gvs = opt.compute_gradients(self.batch_loss)
            capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var)
                          for grad, var in gvs]
            train_op = opt.apply_gradients(capped_gvs)

        for var in tf.trainable_variables():
            tf.histogram_summary(var.op.name, var)

        for grad, var in gvs:

            if grad is not None:
                print(capped_gvs)
                tf.histogram_summary(
                    var.op.name + '/gradients',
                    grad,
                )

        with tf.name_scope("tensors"):
            self.train_op = train_op
            self.logits = logits
            self.total_loss = total_loss
            self.summaries = tf.merge_all_summaries()
d.load_embeddings(args.emb_type, args.word2vec_file, args.glove_file,
                           args.fasttext_file, args.custom_file, logger)
d.batch = d.batch_generator(args.mb)
m = bayesian_emb_model(d, d.K, sess, dir_name)
sigmas_list = list()


# TRAINING
n_iters, n_batches = get_n_iters(args.n_epochs, args.mb, len(d.word_target))
logger.debug('init training number of iters '+str(n_iters)+' and batches '+str(n_batches))
#kl_scaling_weights = get_kl_weights(n_batches)
learning_rates = get_learning_rates(args.clr_type, n_iters, args.clr_cycles, args.base_lr, args.max_lr, args.lr)
m.inference.initialize(n_samples=1, n_iter=n_iters, logdir=m.logdir,
                       scale={m.y_pos: n_batches, m.y_neg: n_batches / args.ns},
                       kl_scaling={m.y_pos: n_batches, m.y_neg: n_batches / args.ns},
                       optimizer=AdamOptimizer(learning_rate=m.learning_rate_placeholder)
                       )
early_stopping = EarlyStopping(patience=args.patience)
init = tf.global_variables_initializer()
sess.run(init)
logger.debug('....starting training')
iteration = 0
for epoch in range(args.n_epochs):
    for batch in range(n_batches):
        info_dict = m.inference.update(feed_dict=d.feed(m.target_placeholder,
                                                        m.context_placeholder,
                                                        m.labels_placeholder,
                                                        m.ones_placeholder,
                                                        m.zeros_placeholder,
                                                        m.learning_rate_placeholder,
                                                        args.mb,
Ejemplo n.º 22
0
    def __init__(self, **optimizer_kwargs):
        self._model = optimizer_kwargs["model"]

        self._individual_learning_rate = optimizer_kwargs[
            "individual_learning_rate"]

        self._learning_rate = optimizer_kwargs["learning_rate"]
        self._rescale_learning_rate = optimizer_kwargs["rescale_learning_rate"]
        self._d_p = None
        self._n_reg = None

        post_optimizer = optimizer_kwargs[
            "post_optimizer"] if "post_optimizer" in optimizer_kwargs else None
        if post_optimizer is None:
            self._post_optimizer = super()

        elif post_optimizer == "Momentum":
            self._post_optimizer = MomentumOptimizer(
                learning_rate=optimizer_kwargs["learning_rate"],
                momentum=0.95,
                use_locking=False,
                name="MomentumOptimizer")

        elif post_optimizer == "RMSProp":
            self._post_optimizer = RMSPropOptimizer(
                learning_rate=optimizer_kwargs["learning_rate"],
                decay=0.9,
                epsilon=1e-5,
                use_locking=False,
                name="RMSPropOptimizer")

        elif post_optimizer == "Adam":
            self._post_optimizer = AdamOptimizer(
                learning_rate=optimizer_kwargs["learning_rate"],
                beta1=0.9,
                beta2=0.999,
                epsilon=1e-8,
                use_locking=False,
                name="AdamOptimizer")
        elif post_optimizer == "Nadam":
            self._post_optimizer = NadamOptimizer(
                learning_rate=optimizer_kwargs["learning_rate"],
                beta1=0.9,
                beta2=0.999,
                epsilon=1e-8,
                use_locking=False,
                name="NadamOptimizer")

        elif post_optimizer == "Nesterov":
            self._post_optimizer = MomentumOptimizer(
                learning_rate=optimizer_kwargs["learning_rate"],
                momentum=0.95,
                use_locking=False,
                use_nesterov=True,
                name="NesterovMomentumOptimizer")
        elif post_optimizer == "NesterovConst":
            self._post_optimizer = NesterovConst(
                model=self._model,
                learning_rate=optimizer_kwargs["learning_rate"],
                use_locking=False,
                name="NesterovConstOptimizer")

        else:
            raise Exception(
                "There is no such post optimizer defined. Must be: None, Adam, Momentum, RMSProp"
            )

        super().__init__(self._learning_rate)
Ejemplo n.º 23
0
    def fit(self, dataset):
        self.w = self.w_hat

        if self.train_type == 'Center':
            self.torque = self.iteration
            self.w = 0

        x = tf.placeholder(tf.float32, [None, 784])
        # dynamically reshape the input
        x_shaped = tf.reshape(x, [-1, 28, 28, 1])
        # now declare the output data placeholder - 10 digits
        y = tf.placeholder(tf.float32, [None, 10])
        # create some convolutional layers
        layer1 = create_new_conv_layer(x_shaped,
                                       self.w[:2],
                                       self.layer1_size[0],
                                       self.layer1_size[1],
                                       self.layer1_size[2],
                                       self.layer1_size[3],
                                       name='layer1')

        layer2 = create_new_conv_layer(layer1,
                                       self.w[2:4],
                                       self.layer2_size[0],
                                       self.layer2_size[1],
                                       self.layer2_size[2],
                                       self.layer2_size[3],
                                       name='layer2')

        flattened_parameter_size = self.flattend_size(
        )**2 * self.layer2_size[1]
        flattened = tf.reshape(layer2, [-1, flattened_parameter_size])

        # setup some weights and bias values for this layer, then activate with ReLU

        wd1 = tf.Variable(self.w[4].reshape(flattened_parameter_size,
                                            self.flatten1_size),
                          name='wd1')
        bd1 = tf.Variable(self.w[5], name='bd1')
        dense_layer1 = tf.matmul(flattened, wd1) + bd1
        dense_layer1 = tf.nn.relu(dense_layer1)

        # another layer with softmax activations

        wd2 = tf.Variable(self.w[6].reshape(self.flatten1_size,
                                            self.flatten2_size),
                          name='wd2')
        bd2 = tf.Variable(self.w[7], name='bd2')
        dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2

        y_ = tf.nn.softmax(dense_layer2)
        #loss is cross_entropy loss
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=dense_layer2,
                                                    labels=y))

        #metrics
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        #Optimizer initialization
        optimizer_gradient = AdamOptimizer_Bing(
            learning_rate=self.learning_rate).minimize(cross_entropy)
        optimizer = AdamOptimizer(
            learning_rate=self.learning_rate).minimize(cross_entropy)

        # setup the initialisation operator
        init_op = tf.global_variables_initializer()
        grad = []
        with tf.Session() as sess:
            # initialise the variables
            sess.run(init_op)
            total_batch = int(len(dataset.train.labels) / self.batch_size)
            count = 0
            for epoch in range(self.torque):
                avg_cost = 0
                self.t += 1
                count += 1

                if count < self.torque:
                    for i in range(total_batch):
                        batch_x, batch_y = dataset.train.next_batch(
                            batch_size=self.batch_size)
                        _, c = sess.run([optimizer, cross_entropy],
                                        feed_dict={
                                            x: batch_x,
                                            y: batch_y
                                        })
                        avg_cost += c / total_batch

                elif count == self.torque:
                    '''
                    #self.grad saved for belta computation. 
                    #It denotes in time t(update time), the gradient of local loss of local parameters

                    '''

                    for i in range(total_batch):
                        batch_x, batch_y = dataset.train.next_batch(
                            batch_size=self.batch_size)
                        g, c = sess.run([optimizer_gradient, cross_entropy],
                                        feed_dict={
                                            x: batch_x,
                                            y: batch_y
                                        })
                        #g[1] is grad_var list
                        gradient_temp = batch_gradient_collector(g[1])
                        grad.append(gradient_temp)
                        avg_cost += c / total_batch

                    self.w = batch_parameter_collector(g[1])
                    #Sum up gradients from each batch
                    self.grad = np.array(grad).sum(axis=0)

                test_acc = sess.run(accuracy,
                                    feed_dict={
                                        x: dataset.test.images,
                                        y: dataset.test.labels
                                    })
                self.history.append([avg_cost, test_acc, str(self.t)])

            return self
                                          dtype=tf.int32),
                         batch_sz,
                         name='accuracy')
    tf.summary.scalar('accuracy', accuracy)

    from tflearn.objectives import categorical_crossentropy

    loss = categorical_crossentropy(softmax_class_op, selected_gesture)
    tf.summary.scalar('classification_loss', loss)

with tf.variable_scope('optimize'):
    lr_op = tf.Variable(5e-4, False, dtype=tf.float32)
    decay_lr_op = tf.assign(lr_op, lr_op * (1 - 1e-4))
    tf.summary.scalar('learning_rate', lr_op)
    with tf.control_dependencies([decay_lr_op]):
        train_step = AdamOptimizer(learning_rate=lr_op).minimize(loss)

display_q = queue.Queue(10)


def display():
    while True:
        softmax_class, display_states = display_q.get()
        print("Prediction: ", np.max(softmax_class, axis=1))
        for states in np.transpose(display_states, axes=[1, 0, 2]):
            env.step(states)
            env.render()
            sleep(.2 / (display_q.qsize() + 1))
        env.reset()

Ejemplo n.º 25
0
    # Build Model
    model = Sequential()
    model.add(Embedding(len(vocab), args.embedding_size, input_length=max_answer_len))
    model.add(Dropout(args.dropout))
    if args.flatten:
        model.add(Flatten())
        model.add(Reshape((1, args.embedding_size * max_answer_len)))
    if args.lstm_dim_2:
        model.add(LSTM(args.lstm_dim_1, return_sequences=True))
        model.add(LSTM(args.lstm_dim_2, return_sequences=False))
    else:
        model.add(LSTM(args.lstm_dim_1, return_sequences=False))
    model.add(Dropout(args.dropout))
    model.add(Dense(1, activation="linear"))
    optimizer = AdamOptimizer()
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['acc'])


    # Train the model
    model.fit(train_x, train_y, epochs=args.epochs, verbose=0)
    
    # Validate
    test_y = test_data.iloc[:, 0]
    test_x = test_data.iloc[:, 1:]
    score = model.evaluate(test_x, test_y, verbose=0)
    print(f"Validation_loss:{score[0]};Validation_accuracy:{score[1]};")

    ## --- End of your code  --- ##

    # Save the trained model
Ejemplo n.º 26
0
            def __init__(self, word_vector_size):
                tf.reset_default_graph()
                self.vector_size = word_vector_size

                self.vectors = tf.placeholder(tf.float32,
                                              shape=(None, None,
                                                     word_vector_size))
                self.user_terms = tf.placeholder(tf.float32,
                                                 shape=(None, None))
                self.padding = tf.placeholder(tf.float32, shape=(None, None))
                self.output = tf.placeholder(tf.float32, shape=(None, 1))
                self.dropout_rate = tf.placeholder(tf.float32)

                xavier = tf.contrib.layers.xavier_initializer()

                # 50 tri-gram, 50 4-gram and 50 5-gram
                filter_tri = tf.Variable(xavier((1, 3, word_vector_size, 50)),
                                         name="weight")  #
                bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias")  #
                self.f3 = filter_tri
                self.b3 = bias_tri

                filter_4 = tf.Variable(xavier((1, 4, word_vector_size, 50)),
                                       name="weight")  #
                bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f4 = filter_4
                self.b4 = bias_4

                filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)),
                                       name="weight")  #
                bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f5 = filter_5
                self.b5 = bias_5

                with tf.name_scope("relevance"):
                    hidden = 150
                    self.relevance_weight = tf.Variable(0.01 * xavier(
                        (hidden, num_classes)))
                    self.relevance_bias = tf.Variable(0.0 * xavier(
                        (1, num_classes)))

                rel, pre_max_true_dropped, pre_max_sum = self.forward(
                    self.vectors)
                self.relevance = rel

                ut = tf.expand_dims(self.user_terms, 2)  # NWC
                rel_masked, pre_max_true_masked_dropped, _ = self.forward(
                    self.vectors * ut)
                self.rel_masked = rel_masked

                self.pre_max_sum = pre_max_sum
                self.get_attribution()

                prediction_error = -tf.reduce_sum(
                    tf.one_hot(tf.cast(self.output, tf.int32), num_classes) *
                    tf.log(rel + 10**-5, name="log2rel"))

                heads = []
                for att in self.attributions:
                    heads.append(
                        tf.reduce_sum(tf.multiply(att, self.user_terms),
                                      axis=1))
                heads_all = tf.stack(heads)
                self.h = heads_all
                self.a = tf.stack(self.attributions)
                # pos_heads =
                # neg_heads = tf.reduce_sum(tf.multiply(self.neg_attribution, self.user_terms), axis=1)

                misattribution_error = 0.0
                corrective_error = 0.0
                att_reg = 0.0

                if use_attribution:
                    misattribution_error += (
                        self.h[tf.cast(self.output[0][0], tf.int32)][0] -
                        0.9)**2
                    att_reg = 0
                    for att in self.attributions:
                        att_reg += tf.reduce_sum(
                            tf.nn.relu(att - att_max_value))

                    corrective_error = -tf.reduce_sum(
                        tf.one_hot(tf.cast(self.output, tf.int32), num_classes)
                        * tf.log(rel_masked + 10**-5, name="log2rel"))

                self.error = (
                    prediction_error +
                    tf.sign(tf.reduce_sum(self.user_terms)) *
                    (misattribution_error + corrective_error + att_reg))

                self.opt = AdamOptimizer()
                self.optimizer = self.opt.minimize(self.error)

                self.sess = tf.Session()
                self.sess.run(tf.global_variables_initializer())
                self.training = False
Ejemplo n.º 27
0
    )

    n_features = 1001
    n_classes = 101
    batch_size = 32
    val_batch_size = 256

    tree = SoftDecisionTree(max_depth=6,
                            n_features=n_features,
                            n_classes=n_classes,
                            max_leafs=None)
    tree.build_tree()

    # optimizer
    optimizer = AdamOptimizer(learning_rate=0.001,
                              beta1=0.9,
                              beta2=0.999,
                              epsilon=1e-08).minimize(tree.loss)

    # Saving the model
    # saver = tf.train.Saver()

    # Initialize the variables (i.e. assign their default value)
    init = global_variables_initializer()

    EPOCHS = 1000
    TOTAL_BATCH = 16
    display_step = 100
    with tf.compat.v1.Session() as sess:
        sess.run(init)
        t0 = time.time()
Ejemplo n.º 28
0
            def __init__(self, word_vector_size):
                tf.reset_default_graph()
                self.vector_size = word_vector_size

                self.vectors = tf.placeholder(tf.float32,
                                              shape=(None, None,
                                                     word_vector_size))
                self.user_terms = tf.placeholder(tf.float32,
                                                 shape=(None, None))
                self.padding = tf.placeholder(tf.float32, shape=(None, None))
                self.output = tf.placeholder(tf.float32, shape=(None, 1))
                self.dropout_rate = tf.placeholder(tf.float32)

                xavier = tf.contrib.layers.xavier_initializer()

                # 50 tri-gram, 50 4-gram and 50 5-gram
                filter_tri = tf.Variable(xavier((1, 3, word_vector_size, 50)),
                                         name="weight")  #
                bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias")  #
                self.f3 = filter_tri
                self.b3 = bias_tri

                filter_4 = tf.Variable(xavier((1, 4, word_vector_size, 50)),
                                       name="weight")  #
                bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f4 = filter_4
                self.b4 = bias_4

                filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)),
                                       name="weight")  #
                bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f5 = filter_5
                self.b5 = bias_5

                with tf.name_scope("relevance"):
                    hidden = 150
                    self.relevance_weight = tf.Variable(0.01 * xavier(
                        (hidden, 2)))
                    self.relevance_bias = tf.Variable(0.0 * xavier((1, 2)))
                    self.relevance_attention_weight = tf.Variable(
                        0.01 * xavier((100, 2)))
                    self.relevance_attention_bias = tf.Variable(0.0 * xavier(
                        (1, 2)))

                rel, pre_max_true_dropped, pre_max_sum = self.forward(
                    self.vectors)
                self.relevance = rel[:, 1]

                ut = tf.expand_dims(self.user_terms, 2)  # NWC
                rel_masked, pre_max_true_masked_dropped, _ = self.forward(
                    self.vectors * ut)
                self.rel_masked = rel_masked

                self.pre_max_sum = pre_max_sum
                self.get_attribution()

                prediction_error = -tf.reduce_sum(
                    (self.output * tf.log(rel[:, 1] + 10**-5, name="log2rel") +
                     (1 - self.output) *
                     tf.log(rel[:, 0] + 10**-5, name="log3rel")))

                pos_heads = tf.reduce_sum(tf.multiply(self.pos_attribution,
                                                      self.user_terms),
                                          axis=1)
                neg_heads = tf.reduce_sum(tf.multiply(self.neg_attribution,
                                                      self.user_terms),
                                          axis=1)

                misattribution_error = 0.0
                corrective_error = 0.0
                att_reg = 0.0

                if use_attribution:
                    misattribution_error += tf.reduce_sum(
                        self.output * (pos_heads - 0.9)**2 +
                        (1 - self.output) * (neg_heads - 0.9)**2)
                    att_reg = tf.reduce_sum(
                        self.output *
                        tf.nn.relu(self.pos_attribution - att_max_value) +
                        (1 - self.output) *
                        tf.nn.relu(self.neg_attribution - att_max_value))

                    corrective_error = -tf.reduce_sum(
                        (self.output *
                         tf.log(rel_masked[:, 1] + 10**-5, name="log2rel2") +
                         (1 - self.output) *
                         tf.log(rel_masked[:, 0] + 10**-5, name="log3rel2")))

                self.error = (
                    prediction_error +
                    tf.sign(tf.reduce_sum(self.user_terms)) *
                    (misattribution_error + corrective_error + att_reg))

                self.opt = AdamOptimizer()
                self.optimizer = self.opt.minimize(self.error)

                self.sess = tf.Session()
                self.sess.run(tf.global_variables_initializer())
                self.training = False
Ejemplo n.º 29
0
            def __init__(self, word_vector_size):
                tf.reset_default_graph()
                self.vector_size = word_vector_size

                self.vectors = tf.placeholder(tf.float32, shape=(None, None, word_vector_size))
                self.user_terms = tf.placeholder(tf.float32, shape=(None, None))
                self.ut2 = tf.placeholder(tf.float32, shape=(None, None))
                self.group_by = tf.placeholder(tf.float32, shape=(None, None, None))
                self.padding = tf.placeholder(tf.float32, shape=(None, None))
                self.output = tf.placeholder(tf.float32, shape=(None, 1))
                self.dropout_rate = tf.placeholder(tf.float32)

                xavier = tf.contrib.layers.xavier_initializer()

                # 50 tri-gram, 50 4-gram and 50 5-gram
                filter_tri = tf.Variable(xavier((1, 2, word_vector_size, 50)), name="weight")  #
                bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias")  #
                self.f3 = filter_tri
                self.b3 = bias_tri

                filter_4 = tf.Variable(xavier((1, 3, word_vector_size, 50)), name="weight")  #
                bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f4 = filter_4
                self.b4 = bias_4

                filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)), name="weight")  #
                bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f5 = filter_5
                self.b5 = bias_5

                with tf.name_scope("relevance"):
                    hidden = 150
                    self.relevance_weight = tf.Variable(0.01 * xavier((hidden, 2)))
                    self.relevance_bias = tf.Variable(0.0 * xavier((1, 2)))
                    self.relevance_attention_weight = tf.Variable(0.01 * xavier((100, 2)))
                    self.relevance_attention_bias = tf.Variable(0.0 * xavier((1, 2)))

                rel, pre_max_true_dropped, pre_max_sum = self.forward(self.vectors)
                self.relevance = rel[:, 1]

                ut = tf.expand_dims(self.ut2, 2)  # NWC
                rel_masked, pre_max_true_masked_dropped, _ = self.forward(self.vectors * ut)
                self.rel_masked = rel_masked

                self.pre_max = pre_max_sum
                self.get_attention()

                # true_attention_error = 0.0
                att_reg = 0.0

                prediction_error = -tf.reduce_sum((self.output * tf.log(rel[:, 1] + 10 ** -5, name="log2rel") + (
                        1 - self.output) * tf.log(rel[:, 0] + 10 ** -5, name="log3rel")))

                # N, num_unique, text_length ; N,text_length
                pos_attention = tf.squeeze(tf.matmul(self.group_by, tf.expand_dims(self.pos_attention, -1)),
                                       squeeze_dims=-1)
                neg_attention = tf.squeeze(tf.matmul(self.group_by, tf.expand_dims(self.neg_attention, -1)),
                                       squeeze_dims=-1)
                self.pos_att_grouped = pos_attention
                self.neg_att_grouped = neg_attention

                pos_heads = tf.reduce_sum(tf.multiply(pos_attention, self.user_terms), axis=1)
                neg_heads = tf.reduce_sum(tf.multiply(neg_attention, self.user_terms), axis=1)
                self.pos_heads = pos_heads

                attention_error = 0.0
                occlusion_error = 0.0
                if use_attention:
                    attention_error += tf.reduce_sum(self.output*(pos_heads - 0.5) ** 2)
                    att_reg = tf.reduce_sum(self.output * tf.nn.relu(self.pos_attention - att_max_value)
                                                     + (1-self.output) * tf.nn.relu(self.neg_attention-att_max_value))
                    occlusion_error =  -tf.reduce_sum((self.output * tf.log(rel_masked[:, 1] + 10 ** -5, name="log2rel2") + (
                        1 - self.output) * tf.log(rel_masked[:, 0] + 10 ** -5, name="log3rel2")))


                self.att = attention_error

                self.error = (   prediction_error
                              + tf.sign(tf.reduce_sum(self.user_terms)) * attention_error
                              +  tf.sign(tf.reduce_sum(self.user_terms)) * occlusion_error
                              + tf.sign(tf.reduce_sum(self.user_terms)) * att_reg)

                self.a = tf.check_numerics(attention_error, message="att") + tf.check_numerics(pos_heads,
                                                                                               message="pos-heads") + tf.check_numerics(
                    neg_heads, message="neg-heads")
                self.opt = AdamOptimizer()
                self.optimizer = self.opt.minimize(self.error)
                self.uncertainty = 1

                self.sess = tf.Session()
                self.sess.run(tf.global_variables_initializer())
                self.n_trained = 0
                self.training = False
Ejemplo n.º 30
0
                      user_num,
                      item_num,
                      cum_table,
                      batch_size=batch_size,
                      max_len=max_len,
                      n_workers=3)

model, emb = build_model(max_len=max_len,
                         input_dim=item_num + 1,
                         embedding_dim=50,
                         feed_forward_units=50,
                         head_num=1,
                         block_num=2,
                         dropout_rate=0.2)

optimizer = AdamOptimizer(0.001)
tbcb = TensorBoard(log_dir='/logs',
                   histogram_freq=1,
                   write_graph=True,
                   write_grads=True,
                   write_images=True,
                   embeddings_freq=1)

loss_history = []
cos_loss_history = []

T = 0.0
t0 = time.time()

tbcb.set_model(model)
tbcb.on_train_begin()