コード例 #1
0
ファイル: naf.py プロジェクト: scturtle/rl
    def _build(self):
        action_size = self.cfg.action_dim[0]
        self.x = tf.placeholder(shape=(None, ) + tuple(self.cfg.status_dim),
                                dtype=tf.float32,
                                name='x')
        self.u = tf.placeholder(shape=(None, ) + tuple(self.cfg.action_dim),
                                dtype=tf.float32,
                                name='u')
        self.y = tf.placeholder(shape=(None), dtype=tf.float32, name='y')

        h = self.fc(self.x, self.cfg.hidden_dim, activation_fn=tf.nn.relu)
        h = self.fc(h, self.cfg.hidden_dim, activation_fn=tf.nn.relu)
        self.V = tf.squeeze(self.fc(h, 1))
        self.mu = self.fc(h, action_size)
        self.l = self.fc(h, action_size * (action_size + 1) // 2)
        self.L = self.to_lower_triangle(self.l, action_size)
        self.P = tf.matmul(self.L, tf.transpose(self.L, (0, 2, 1)), name="P")

        diff_u = tf.expand_dims(self.u - self.mu, 1)
        self.A = -tf.matmul(diff_u,
                            tf.matmul(self.P, tf.transpose(diff_u, (0, 2, 1))))
        self.A = tf.squeeze(tf.reshape(self.A, (-1, 1)), name="A")
        self.Q = self.A + self.V
        self.loss = tf.reduce_mean(tf.squared_difference(self.y, self.Q))
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=self.cfg.learning_rate).minimize(
                self.loss, global_step=get_or_create_global_step())

        self.summaries = tf.summary.merge([
            tf.summary.scalar("loss", self.loss),
            tf.summary.histogram("mu", self.mu),
            tf.summary.histogram("Q", self.Q)
        ])
コード例 #2
0
    def __init__(self, hidden_size, batch_size, learning_rate):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])
        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=tf.nn.elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True}):
            with tf.variable_scope('model') as scope:
                encoded = encoder(self.input_tensor, hidden_size * 2)

                mean = encoded[:, :hidden_size]
                stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:]))

                epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size])
                input_sample = mean + epsilon * stddev

                output_tensor = decoder(input_sample)
            with tf.variable_scope('model', reuse=True) as scope:
                self.sampled_tensor = decoder(
                    tf.random_normal([batch_size, hidden_size]))
        vae_loss = self.__get_vae_cost(mean, stddev)
        rec_loss = self.__get_reconstruction_cost(output_tensor,
                                                  self.input_tensor)

        loss = vae_loss + rec_loss
        self.train = layers.optimize_loss(loss,
                                          get_or_create_global_step(),
                                          learning_rate=learning_rate,
                                          optimizer='Adam',
                                          update_ops=[])

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
コード例 #3
0
ファイル: dqn.py プロジェクト: scturtle/rl
    def _build_model(self):
        self.X = tf.placeholder(shape=[None, 84, 84, 4],
                                dtype=tf.uint8,
                                name="X")
        self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
        self.a = tf.placeholder(shape=[None], dtype=tf.int32, name="a")

        X = tf.to_float(self.X) / 255.0
        conv1 = tfl.conv2d(X, 32, 8, 4)
        conv2 = tfl.conv2d(conv1, 64, 4, 2)
        conv3 = tfl.conv2d(conv2, 64, 3, 1)
        flattened = tfl.flatten(conv3)
        fc1 = tfl.fully_connected(flattened, 512)
        self.predictions = tfl.fully_connected(fc1,
                                               self.nA,
                                               activation_fn=None)

        batch_size = tf.shape(self.a)[0]
        ind = tf.pack([tf.range(batch_size), self.a], axis=1)
        self.action_predictions = tf.gather_nd(self.predictions, ind)

        self.network_params = get_variables(self.scope)

        self.loss = tf.reduce_mean(
            tf.squared_difference(self.y, self.action_predictions))
        self.train_op = tf.train.RMSPropOptimizer(
            0.0025, 0.99, 0.0,
            1e-6).minimize(self.loss, global_step=get_or_create_global_step())

        self.summaries = tf.summary.merge([
            tf.summary.scalar("loss", self.loss),
            tf.summary.histogram("a", tf.argmax(self.predictions, axis=1)),
            tf.summary.histogram("max_q", tf.reduce_max(self.predictions)),
        ])
コード例 #4
0
    def _build(self):
        self.x = tf.placeholder(
            shape=(None, self.cfg.status_size), dtype=tf.float32, name='x')
        self.a = tf.placeholder(
            shape=(None, self.cfg.action_size), dtype=tf.float32, name='a')
        self.y = tf.placeholder(shape=(None, ), dtype=tf.float32, name='y')

        x = self.x

        h = tfl.fully_connected(x, 400)
        h = tfl.fully_connected(tf.concat_v2([h, self.a], 1), 300)
        q = tfl.fully_connected(
            h, 1,
            activation_fn=None,
            weights_regularizer=tfl.l2_regularizer(1e-2))
        self.q = tf.squeeze(q)

        self.network_params = get_variables(self.scope)

        self.loss = tf.reduce_mean(tf.squared_difference(self.y, self.q))
        self.train_op = tf.train.AdamOptimizer(self.cfg.learning_rate).minimize(
            self.loss, global_step=get_or_create_global_step())

        batch_size = tf.cast(tf.shape(self.a)[0], tf.float32)
        self.action_gradient = tf.div(tf.gradients(self.q, self.a), batch_size)

        self.summaries = tf.summary.merge([
            tf.summary.scalar("loss", self.loss),
            tf.summary.histogram("q", self.q),
        ])
コード例 #5
0
    def __init__(self,
                 metric=None,
                 log_dir='/tmp/tflearn_logs/',
                 global_step=None,
                 session=None,
                 graph=None,
                 name=None):

        self.name = name

        # Estimator Graph and Session
        self.graph = tf.Graph() if None else graph
        self.session = tf.Session() if None else session
        if global_step is None:
            with self.graph.as_default():
                self.global_step = framework.get_or_create_global_step()

        self.metric = validate_func(metric)

        # Estimator Graph Branches
        self._train = GraphBranch()
        self._pred = GraphBranch()
        self._eval = GraphBranch()

        # Tensor Utils
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        self.log_dir = log_dir
        self._is_initialized = False
        self._to_be_restored = False

        # Ops
        self.train_op = None
        self.loss_op = None
コード例 #6
0
ファイル: resnet_cifar.py プロジェクト: wangsanpeng/DLAPP
    def build_graph(self):
        from tensorflow.contrib.framework import get_or_create_global_step
        self.global_step = get_or_create_global_step()

        self._build_model()
        if self.mode == 'train':
            self._build_train_op()
コード例 #7
0
 def update(self, x, a, y):
     _, loss, summaries, global_step = self.sess.run(
         [self.train_op, self.loss, self.summaries, get_or_create_global_step()],
         feed_dict={self.x: x, self.a: a, self.y: y})
     if self.summary_writer:
         self.summary_writer.add_summary(summaries, global_step)
     return loss
コード例 #8
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = contrib_framework.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()

        # Create a compression object using the compression hyperparameters
        compression_obj = cifar10.create_compressor(FLAGS.compression_hparams,
                                                    global_step=global_step)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images, compression_obj)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step, compression_obj)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1

            def before_run(self, run_context):
                self._step += 1
                self._start_time = time.time()
                return tf.train.SessionRunArgs(loss)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                duration = time.time() - self._start_time
                loss_value = run_values.results
                if self._step % 10 == 0:
                    num_examples_per_step = 128
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str %
                          (datetime.datetime.now(), self._step, loss_value,
                           examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
コード例 #9
0
ファイル: ops.py プロジェクト: ml-lab/glas
def exponential_decay(batch_size, num_epochs, initial_rate, decay_rate, dataset,
                      staircase=True, name=None):
    """ Get the exponential decay for the following parameters """
    global_step = framework.get_or_create_global_step()
    decay_steps = int(num_epochs * dataset.num_samples / batch_size)

    return tf.train.exponential_decay(
        initial_rate, global_step,
        decay_steps, decay_rate,
        staircase=staircase, name=name)
コード例 #10
0
def optimizer_exp_decay():
    """Construct the optimizer with learning rate decay every experience.

    Returns:
        The optimizer.
    """
    global_step = framework.get_or_create_global_step()
    learning_rate = tf.train.exponential_decay(learning_rate=0.1,
                                               global_step=global_step,
                                               decay_steps=100,
                                               decay_rate=0.001)
    return tf.train.AdagradOptimizer(learning_rate=learning_rate)
コード例 #11
0
ファイル: dense_net.py プロジェクト: wangsanpeng/DLAPP
    def __init__(self, model_conf, is_training, images, labels):
        self._depth = model_conf.DEPTH
        self._growth_rate = model_conf.GROWTH_RATE
        self.model_conf = model_conf
        self._num_layer_per_block = int((self._depth-4)/3)

        self._images = tf.reshape(images, shape=[-1, self.model_conf.HEIGHT, self.model_conf.WIDTH, 1])
        self._labels = labels
        self._is_training = is_training
        self._layer_func = self.bottleneck_layer if self.model_conf.BOTTLENECK else self.add_layer
        self.global_step = get_or_create_global_step()

        logger.info("In %s phase, using %s as layer function" % (self.model_conf.MODE, self._layer_func.__name__))
コード例 #12
0
ファイル: dqn.py プロジェクト: scturtle/rl
 def update(self, s, a, y):
     """ ([?, 84, 84, 4], [?], [?]) -> loss """
     _, loss, summaries, global_step = self.sess.run([
         self.train_op, self.loss, self.summaries,
         get_or_create_global_step()
     ],
                                                     feed_dict={
                                                         self.X: s,
                                                         self.a: a,
                                                         self.y: y
                                                     })
     if self.summary_writer:
         self.summary_writer.add_summary(summaries, global_step)
     return loss
コード例 #13
0
def main():

    env = CliffWalkingEnv()
    sess = tf.Session()
    ac = ActorCritic(env.nA, env.nS)
    sess.run(tf.global_variables_initializer())

    date_str = datetime.now().strftime("%m%d_%H%M%S")
    summaries_dir = os.path.abspath("./summary/ac/" + date_str)
    if not os.path.exists(summaries_dir):
        os.makedirs(summaries_dir)
    summary_writer = tf.summary.FileWriter(summaries_dir,
                                           graph=tf.get_default_graph())

    state = env.reset()
    episode_cnt = 0
    episode_step = 0
    episode_reward = 0.
    while 1:

        probs, value = sess.run([ac.probs, ac.value],
                                feed_dict={ac.state: state})
        action = np.random.choice(env.nA, p=probs)
        next_state, reward, done, _ = env.step(action)

        episode_step += 1
        episode_reward += reward

        value_next = sess.run(ac.value, feed_dict={ac.state: next_state})
        td_target = reward + 0.99 * value_next
        td_adv = td_target - value

        summary, global_step, _, _ = \
            sess.run([ac.summary, get_or_create_global_step(), ac.train_p, ac.train_v],
                     feed_dict={ac.state: state, ac.action: action,
                                ac.adv: td_adv, ac.target: td_target})

        summary_writer.add_summary(summary, global_step)

        if done or episode_step > 1000:
            print('episode cnt:', episode_cnt, 'eoisode step:', episode_step,
                  'reward:', episode_reward)
            episode_step = 0.
            episode_reward = 0.
            episode_cnt += 1
            state = env.reset()
        else:
            state = next_state
コード例 #14
0
def main(args):
    cam = cv2.VideoCapture(0)

    # Create global step
    global_step = framework.get_or_create_global_step()

    # Tensor that holds raw camera frame
    image_input = tf.placeholder(tf.float32, shape=[None, None, 3], name='input_placeholder')
    input_size = tf.shape(image_input)[:2]
    batched_input = tf.expand_dims(image_input, 0)
    resized_im = tf.image.resize_images(batched_input, IMAGE_SIZE)

    # Placeholder for target
    label_placeholder = tf.placeholder(tf.float32, shape=[None, None, 3], name='label_placeholder')
    batched_label = tf.expand_dims(label_placeholder, 0)
    resized_label = tf.image.resize_images(batched_label, IMAGE_SIZE)

    # Create model
    output = build_model(resized_im, is_training=True)
    resized_output = tf.squeeze(tf.image.resize_images(output, input_size))

    # Create optimizer
    train_op, loss_op = build_train_op(resized_label, output, args.learning_rate, global_step=global_step)

    with tf.Session() as sess:
        # Initialize
        init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init)

        step=0
        while True:
            ret, im = cam.read()
            im = im.astype('float') / 255.0

            import math
            l = math.sin(math.pi * step / 40)
            target = l**2 * im

            step, output, loss, _ = sess.run([global_step, resized_output, loss_op, train_op], feed_dict={image_input: im, label_placeholder: target})

            print "%i: %0.5f" % (step, loss)
            cv2.imshow('input', im)
            cv2.imshow('output', output)
            cv2.imshow('target', target)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
コード例 #15
0
    def build_model(self):
        config = self.config
        data_generator = self.data_generator
        logging.info('Building the model...')
        # Placeholders
        self.inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name='inputs')
        self.inputs_length = tf.placeholder(dtype=tf.int32, shape=[None], name='inputs_length')
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='targets')
        self.targets_length = tf.placeholder(dtype=tf.int32, shape=[None], name='targets_length')

        vocab_size = len(data_generator.vocab)
        embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32)

        with tf.variable_scope('decoder'):
            with tf.variable_scope('output') as output_scope:
                # This variable-scope-trick is used to ensure that
                # output_fn has a proper scope regardless of a caller's
                # scope.
                def output_fn(cell_outputs):
                    return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None,
                        scope=output_scope)

        self.rnn_cell = rnn.GRUBlockCell(config.sentence_dim)
        self.encoder_state = self.encode(cell=self.rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length,
            scope='encoder')
        self.decoder_outputs = self.decode_train(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state,
            targets=self.targets[:, :-1], targets_length=self.targets_length - 1, scope='decoder')
        self.generated = self.decode_inference(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state,
            output_fn=output_fn, vocab_size=vocab_size, bos_id=data_generator.vocab['<EOS>'],
            eos_id=data_generator.vocab['<EOS>'], max_length=config.max_length, scope='decoder', reuse=True)
        self.loss = self.loss(decoder_outputs=self.decoder_outputs, output_fn=output_fn, targets=targets[:, 1:],
                        targets_length=self.targets_length - 1)

        self.global_step = get_or_create_global_step()
        self.train_op = slim.optimize_loss(loss=self.loss, global_step=self.global_step, learning_rate=None,
            optimizer=tf.train.AdamOptimizer(), clip_gradients=5.0)

        self.summary_writer = tf.summary.FileWriter(logdir=os.path.join(config.save_dir, 'log'))
        self.summary = tf.summary.merge_all()

        tf.get_variable_scope().set_initializer(tf.random_normal_initializer(mean=0.0, stddev=0.01))
        tf.global_variables_initializer().run()

        self.saver = tf.train.Saver(max_to_keep=20)
コード例 #16
0
    def __init__(self, images, labels, model_conf, is_training):
        self._images = images
        self._labels = labels
        self.model_conf = model_conf
        self._is_training = is_training
        self.global_step = get_or_create_global_step()

        self._filters = [64, 64, 128, 256, 512]
        self._kernels = [7, 3, 3, 3, 3]
        self._stride = [2, 1, 2, 2, 2]

        if self._is_training:
            self._mode = "TRAIN"
            self._reuse = False
        else:
            self._mode = "Not TRAIN"
            self._reuse = True

        logger.info("In %s phase" % (self._mode))
コード例 #17
0
    def __init__(self, nA, nS):
        self.state = tf.placeholder(shape=(), dtype=tf.uint8, name='state')
        self.action = tf.placeholder(dtype=tf.int32, name='action')
        self.target = tf.placeholder(dtype=tf.float32, name='target')
        self.adv = tf.placeholder(dtype=tf.float32, name='advantage')

        state_onehot = tf.one_hot(self.state, nS, dtype=tf.float32)
        hidden = tf.expand_dims(state_onehot, 0)

        self.probs = tf.squeeze(
            tfl.fully_connected(hidden,
                                nA,
                                activation_fn=tf.nn.softmax,
                                biases_initializer=None))
        self.value = tf.squeeze(
            tfl.fully_connected(hidden,
                                1,
                                activation_fn=None,
                                biases_initializer=None))

        action_prob = tf.gather(self.probs, self.action)

        self.policy_loss = -tf.log(action_prob) * self.adv
        self.value_loss = tf.squared_difference(self.value, self.target)
        self.loss = self.policy_loss + self.value_loss

        learning_rate = 0.01
        global_step = get_or_create_global_step()
        self.train_p = tf.train.AdamOptimizer(learning_rate=learning_rate)\
                               .minimize(self.policy_loss, global_step=global_step)
        self.train_v = tf.train.AdamOptimizer(learning_rate=learning_rate)\
                               .minimize(self.value_loss, global_step=global_step)

        self.summary = tf.summary.merge([
            tf.summary.scalar("target", self.target),
            tf.summary.scalar("adv", self.adv),
            tf.summary.histogram("probs", self.probs),
            tf.summary.scalar("value", self.value),
            tf.summary.scalar("policy_loss", self.policy_loss),
            tf.summary.scalar("value_loss", self.value_loss)
        ])
コード例 #18
0
ファイル: dense_net.py プロジェクト: wangsanpeng/DLAPP
    def __init__(self, model_conf, is_training, images, labels):
        self._depth = model_conf.DEPTH
        self._growth_rate = model_conf.GROWTH_RATE
        self._compression_rate = model_conf.COMPRESSION_TARE
        self.model_conf = model_conf
        self._num_layer_per_block = int((self._depth-5)/4)

#        self._images = tf.reshape(images, shape=[-1, self.model_conf.HEIGHT, self.model_conf.WIDTH, 3])
        self._images = images
        self._labels = labels
        self._is_training = is_training
        self._layer_func = self.bottleneck_layer if self.model_conf.BOTTLENECK else self.add_layer
        self.global_step = get_or_create_global_step()

        if self._is_training:
            self._mode = "TRAIN"
            self._reuse = False
        else:
            self._mode = "Not TRAIN"
            self._reuse = True

        logger.info("In %s phase, using %s as layer function" % (self._mode, self._layer_func.__name__))
コード例 #19
0
ファイル: gan.py プロジェクト: wangruichens/gan_zoo
    def __init__(self, hidden_size, batch_size, learning_rate):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])

        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=concat_elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True}):
            with tf.variable_scope('model'):
                D1 = discriminator(self.input_tensor)  # positive example
                D_params_num = len(tf.trainable_variables())
                G = decoder(tf.random_normal([batch_size, hidden_size]))
                self.sampled_tensor = G

            with tf.variable_scope('model', reuse=True):
                D2 = discriminator(G)
        D_loss = self.__get_discrinator_loss(D1, D2)
        G_loss = self.__get_generator_loss(D2)

        params = tf.trainable_variables()
        D_params = params[:D_params_num]
        G_params = params[D_params_num:]
        global_step = get_or_create_global_step()
        self.train_discrimator = layers.optimize_loss(D_loss,
                                                      global_step,
                                                      learning_rate / 10,
                                                      'Adam',
                                                      variables=D_params,
                                                      update_ops=[])
        self.train_generator = layers.optimize_loss(G_loss,
                                                    global_step,
                                                    learning_rate,
                                                    'Adam',
                                                    variables=G_params,
                                                    update_ops=[])

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
コード例 #20
0
    def __init__(self, input_producer, embed_mat, config, is_train):

        with tf.variable_scope("VAE") as var_scope:
            x_enc = input_producer.x_enc
            x_dec = input_producer.x_dec
            y_dec = input_producer.y_dec
            len_enc = input_producer.len_enc
            len_dec = input_producer.len_dec

            max_len = input_producer.seq_max_length
            vocab_num = input_producer.vocab_num
            batch_size = config.batch_size
            hidden_size = config.hidden_size
            embed_dim = config.embed_dim

            is_GRU = config.is_GRU
            is_argmax_sampling = config.is_argmax_sampling
            word_keep_prob = config.word_dropout_keep_prob
            max_grad_norm = config.max_grad_norm
            learning_rate = config.learning_rate

            self.KL_weight = tf.Variable(0.0, "KL_weight")
            self.input_ids = y_dec

            def _lstm_cell():
                return BasicLSTMCell(num_units=hidden_size,
                                     forget_bias=1.0,
                                     state_is_tuple=True,
                                     reuse=tf.get_variable_scope().reuse)
            def _gru_cell():
                return GRUCell(num_units=hidden_size,
                               reuse=tf.get_variable_scope().reuse)

            cell = _gru_cell if is_GRU else _lstm_cell
            self.initial_state = cell().zero_state(batch_size, tf.float32)


            # encoder
            with tf.device("/cpu:0"):
                embed_init = tf.constant_initializer(embed_mat)\
                                if (embed_mat is not None) else None
                embedding = tf.get_variable("embedding", [vocab_num, embed_dim],
                                             initializer=embed_init,
                                             trainable=True)
                in_enc = embedding_lookup(embedding, x_enc)



            with tf.variable_scope("encoder"):
                out_tuple = dynamic_rnn(cell=cell(),
                                        inputs=in_enc,
                                        sequence_length=len_enc,
                                        initial_state=self.initial_state)
                (_, encoder_hidden) = out_tuple

                # linear layers for mu and log(var)
                latent_dim = hidden_size # may have to change this later
                W_mu = tf.get_variable("W_mu", [hidden_size,latent_dim])
                b_mu = tf.get_variable("b_mu", [latent_dim])
                W_logvar = tf.get_variable("W_logvar", [hidden_size,latent_dim])
                b_logvar = tf.get_variable("b_logvar", [latent_dim])
                #l2_loss = tf.nn.l2_loss(W_mu) + tf.nn.l2_loss(W_logvar)

                mu = tf.matmul(encoder_hidden, W_mu) + b_mu
                logvar = tf.matmul(encoder_hidden, W_logvar) + b_logvar

                # sample epsilon
                epsilon = tf.random_normal(tf.shape(logvar), name='epsilon')

                # sample latent variable
                stddev = tf.exp(0.5 * logvar) # standard deviation
                self.z = mu + tf.multiply(stddev, epsilon)

            # decoder
            with tf.device("/cpu:0"):
                in_dec = embedding_lookup(embedding, x_dec)

            with tf.variable_scope("decoder"):

                helper = WordDropoutTrainingHelper(
                                      inputs=in_dec,
                                      sequence_length=len_dec,
                                      embedding=embedding,
                                      dropout_keep_prob=word_keep_prob,
                                      drop_token_id=UNK_ID,
                                      is_argmax_sampling=is_argmax_sampling)

                # projection layer
                output_layer = Dense(units=vocab_num,
                                     activation=None,
                                     use_bias=True,
                                     trainable=True)

                # decoder
                decoder = BasicDecoder(cell=cell(),
                                       helper=helper,
                                       initial_state=self.z,
                                       output_layer=output_layer)

                # dynamic_decode
                out_tuple = dynamic_decode(decoder=decoder,
                                           output_time_major=False, #  speed
                                           impute_finished=True)

            # get all the variables in this scope
            self.vars = tf.contrib.framework.get_variables(var_scope)

        # (ouputs, state, sequence_length)
        (self.outputs, _, self.cell_outputs_len) = out_tuple # final

        # (cell_outputs, sample_ids)
        (self.cell_outputs, self.sampled_ids) = self.outputs

        # compute softmax loss (reconstruction)
        len_out = tf.reduce_max(len_dec)
        targets = y_dec[:,:len_out]
        weights = tf.sequence_mask(self.cell_outputs_len, dtype=tf.float32)

        softmax_loss = sequence_loss(logits=self.cell_outputs,
                                     targets=targets,
                                     weights=weights,
                                     average_across_timesteps=True,
                                     average_across_batch=True)

        self.AE_loss = self.AE_loss_mean = softmax_loss

        # compute KL loss (regularization)
        KL_term = 1 + logvar - tf.pow(mu, 2) - tf.exp(logvar)
        self.KL_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1)
        self.KL_loss_mean = tf.reduce_mean(self.KL_loss)

        # total loss
        self.loss = self.AE_loss + self.KL_weight * self.KL_loss_mean

        # optimization
        self.lr = tf.Variable(learning_rate, trainable=False, name="lr")

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.vars),
                                          max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)

        self.global_step = get_or_create_global_step()
        self.train_op = optimizer.apply_gradients(zip(grads, self.vars),
                                                  global_step=self.global_step)

        # learning_rate update
        self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr")
        self.lr_update = tf.assign(self.lr, self.new_lr)

        # KL weight update
        self.new_KL_weight = tf.placeholder(tf.float32, shape=[], name="new_kl")
        self.KL_weight_update = tf.assign(self.KL_weight, self.new_KL_weight)

        # summaries
        tf.summary.scalar("Loss/AE_mean", self.AE_loss_mean)
        tf.summary.scalar("Loss/KL_mean", self.KL_loss_mean)
        tf.summary.scalar("Loss/Total", self.AE_loss_mean + self.KL_loss_mean)
        tf.summary.scalar("Misc/KL_weight", self.KL_weight)
        tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(mu))
        tf.summary.scalar("Misc/sigma_mean", tf.reduce_mean(stddev))
        tf.summary.scalar("Misc/learning_rate", self.lr)
        self.summary_op = tf.summary.merge_all()
コード例 #21
0
    def __init__(self, input_producer, embed_mat, config, is_train):
        x_enc = input_producer.x_enc
        x_dec = input_producer.x_dec
        y_dec = input_producer.y_dec
        len_enc = input_producer.len_enc
        len_dec = input_producer.len_dec
        self.answer = input_producer.answ_disc

        max_len = input_producer.seq_max_length
        vocab_num = input_producer.vocab_num
        config.update(**dict(max_len=max_len, vocab_num=vocab_num))
        # import ipdb; ipdb.set_trace()
        self.kl_weight = tf.Variable(0.0, "KL_weight")
        self.input_ids = y_dec

        modeler = CtrlVAEModelingHelper(config, embed_mat)

        with tf.variable_scope("CtrlVAE"):

            ### VAE ############################################################

            # encoder
            x_enc_onehot = tf.one_hot(x_enc, vocab_num)
            out_tuple = modeler.encoder(x_enc_onehot=x_enc_onehot,
                                        len_enc=len_enc)
            (vae_z, vae_mu, vae_logvar) = out_tuple

            # holistic representation
            with tf.device("/cpu:0"):
                vae_c = embedding_lookup(modeler.embed, self.answer)
            vae_c = tf.reshape(vae_c, [config.batch_size, -1])
            vae_represent = tf.concat([vae_z, vae_c], axis=1)

            # decoder
            x_dec_onehot = tf.one_hot(x_dec, config.vocab_num)
            out_tuple = modeler.decoder(initial_state=vae_represent,
                                        x_dec_onehot=x_dec_onehot,
                                        len_dec=len_dec,
                                        is_teacher_forcing=True)

            (vae_outputs, vae_state, vae_outputs_len) = out_tuple  # final
            (self.vae_output, self.vae_sample) = vae_outputs

            ### Generator ######################################################

            # random z and c from the prior
            self.gen_z = tf.random_normal(
                [config.batch_size, config.hidden_size])
            self.gen_c = vae_c
            gen_represent = tf.concat([self.gen_z, self.gen_c], axis=1)

            # generator (decoder)
            x_dec_onehot = tf.one_hot(x_dec, config.vocab_num)
            out_tuple = modeler.decoder(initial_state=gen_represent,
                                        x_dec_onehot=x_dec_onehot,
                                        len_dec=len_dec,
                                        is_teacher_forcing=True,
                                        reuse=True)

            (gen_outputs, gen_state, gen_outputs_len) = out_tuple  # final
            (self.gen_output, self.gen_sample) = gen_outputs
            gen_outputs_onehot = softmax(self.gen_output / ALMOST_ZERO)

            # discriminator (for c code)
            out_tuple = modeler.discriminator(inputs=gen_outputs_onehot,
                                              inputs_length=gen_outputs_len)
            (self.gen_c_output, self.gen_c_sample) = out_tuple

            # encoder again (for z code ; additional discriminator)
            out_tuple = modeler.encoder(x_enc_onehot=gen_outputs_onehot,
                                        len_enc=gen_outputs_len,
                                        reuse=True)
            (gen_z, dis_mu, dis_logvar) = out_tuple

            ### Discriminator ##################################################

            # discriminator (for training)
            x_dis_onehot = tf.one_hot(x_enc, config.vocab_num)
            out_tuple = modeler.discriminator(inputs=x_dis_onehot,
                                              inputs_length=gen_outputs_len,
                                              reuse=True)
            (self.dis_outputs, self.dis_sample) = out_tuple

        ########################################################################
        # get all the variables in this scope
        self.vars = get_variables("CtrlVAE")
        self.enc_vars = get_variables("CtrlVAE/encoder")
        self.gen_vars = get_variables("CtrlVAE/decoder")
        self.dis_vars = get_variables("CtrlVAE/discriminator")
        self.vae_vars = self.enc_vars + self.gen_vars
        ########################################################################
        # compute AE loss (reconstruction)
        len_out = tf.reduce_max(vae_outputs_len)
        targets = y_dec[:, :len_out]
        weights = tf.sequence_mask(vae_outputs_len, dtype=tf.float32)

        softmax_loss = sequence_loss(logits=self.vae_output,
                                     targets=targets,
                                     weights=weights,
                                     average_across_timesteps=False,
                                     average_across_batch=False)

        # NOTE: fix later!
        loss_sum = tf.reduce_sum(softmax_loss, axis=1)
        self.ae_loss = self.ae_loss_mean = tf.reduce_mean(loss_sum, axis=0)
        #self.ae_loss_mean = tf.reduce_mean(softmax_loss)

        # compute KL loss (regularization)
        KL_term = 1 + vae_logvar - tf.pow(vae_mu, 2) - tf.exp(vae_logvar)
        self.kl_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1)
        self.kl_loss_mean = tf.reduce_mean(self.kl_loss)

        # VAE total loss
        self.vae_loss = self.ae_loss + self.kl_weight * self.kl_loss_mean
        ########################################################################
        # c code loss
        answer_labels = tf.one_hot(self.answer, config.vocab_num)
        c_loss = softmax_cross_entropy_with_logits(labels=answer_labels,
                                                   logits=self.gen_c_output)
        self.c_loss = tf.reduce_mean(c_loss)

        # z code loss
        mu_loss = mean_pairwise_squared_error(vae_mu, dis_mu)
        logvar_loss = mean_pairwise_squared_error(vae_logvar, dis_logvar)
        self.z_loss = (mu_loss + logvar_loss) / 2

        # generator total loss
        self.gen_loss = self.c_loss + self.z_loss
        ########################################################################
        # discriminator training loss
        dis_loss = softmax_cross_entropy_with_logits(labels=answer_labels,
                                                     logits=self.dis_outputs)
        self.dis_loss = tf.reduce_mean(dis_loss)
        ########################################################################

        # optimization
        lr = config.learning_rate
        self.vae_lr = tf.Variable(lr, trainable=False, name="vae_lr")
        self.gen_lr = tf.Variable(0.0, trainable=False, name="gen_lr")
        self.dis_lr = tf.Variable(lr, trainable=False, name="dis_lr")

        vae_optim = tf.train.AdamOptimizer(self.vae_lr)
        gen_optim = tf.train.AdamOptimizer(self.gen_lr)
        dis_optim = tf.train.AdamOptimizer(self.dis_lr)

        vae_grads = tf.gradients(self.vae_loss, self.vae_vars)
        gen_grads = tf.gradients(self.gen_loss, self.gen_vars)
        dis_grads = tf.gradients(self.dis_loss, self.dis_vars)

        vae_grads, _ = tf.clip_by_global_norm(vae_grads, config.max_grad_norm)
        gen_grads, _ = tf.clip_by_global_norm(gen_grads, config.max_grad_norm)
        dis_grads, _ = tf.clip_by_global_norm(dis_grads, config.max_grad_norm)

        self.global_step = get_or_create_global_step()
        self.vae_train = vae_optim.apply_gradients(
            zip(vae_grads, self.vae_vars))
        self.gen_train = gen_optim.apply_gradients(
            zip(gen_grads, self.gen_vars))
        self.dis_train = dis_optim.apply_gradients(
            zip(dis_grads, self.dis_vars), self.global_step)

        # learning_rate update
        self.new_gen_lr = tf.placeholder(tf.float32,
                                         shape=[],
                                         name="new_gen_lr")
        self.gen_lr_update = tf.assign(self.gen_lr, self.new_gen_lr)

        # KL weight update
        self.new_kl_weight = tf.placeholder(tf.float32,
                                            shape=[],
                                            name="new_kl")
        self.kl_weight_update = tf.assign(self.kl_weight, self.new_kl_weight)

        # summaries
        tf.summary.scalar("Loss/ae_mean", self.ae_loss_mean)
        tf.summary.scalar("Loss/kl_mean", self.kl_loss_mean)
        tf.summary.scalar("Loss/Total", self.ae_loss_mean + self.kl_loss_mean)
        tf.summary.scalar("Misc/kl_weight", self.kl_weight)
        tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(vae_mu))
        tf.summary.scalar("Misc/logvar_mean", tf.reduce_mean(vae_logvar))
        tf.summary.scalar("Misc/gen_lr", self.gen_lr)
        self.summary_op = tf.summary.merge_all()
コード例 #22
0
def run():
    #Create log_dir for evaluation information
    if not os.path.exists(log_eval):
        os.mkdir(log_eval)

    #Just construct the graph from scratch again
    with tf.Graph().as_default() as graph:
        tf.logging.set_verbosity(tf.logging.INFO)
        print("tf.logging")
        #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
        dataset = get_split('validation', dataset_dir)
        print("dataset")
        images, raw_images, labels = load_batch(dataset,
                                                batch_size=batch_size,
                                                is_training=False)
        print(labels)
        #Create some information about the training steps
        num_batches_per_epoch = dataset.num_samples / batch_size
        num_steps_per_epoch = num_batches_per_epoch
        print("num_batches_per_epoch,num_steps_per_epoch",
              num_batches_per_epoch, num_steps_per_epoch)

        #Now create the inference model but set is_training=False
        with slim.arg_scope(inception_resnet_v2_arg_scope()):
            logits, end_points = inception_resnet_v2(
                images, num_classes=dataset.num_classes, is_training=False)
            print("logists")

        # #get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        print("finished variables_to_restore")
        saver = tf.train.Saver(variables_to_restore)
        print("finished tf.train.Saver(variables_to_restore)")

        def restore_fn(sess):
            print(checkpoint_file)
            print("saver.restore(sess, checkpoint_file)")
            return saver.restore(sess, checkpoint_file)

        #Just define the metrics to track without the loss or whatsoever
        predictions = tf.argmax(end_points['Predictions'], 1)
        print("predictions", predictions)
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)
        print("accuracy, accuracy_update\n")
        metrics_op = tf.group(accuracy_update)
        print(" metrics_op\n")

        #Create the global step and an increment op for monitoring
        global_step = get_or_create_global_step()
        print("global_step\n")
        global_step_op = tf.assign(
            global_step, global_step + 1
        )  #no apply_gradient method so manually increasing the global_step
        print("global_step_op\n")

        #Create a evaluation step function
        def eval_step(sess, metrics_op, global_step):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            _, global_step_count, accuracy_value = sess.run(
                [metrics_op, global_step_op, accuracy])
            time_elapsed = time.time() - start_time

            #Log some information
            logging.info(
                'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)',
                global_step_count, accuracy_value, time_elapsed)

            return accuracy_value

        #Define some scalar quantities to monitor
        tf.summary.scalar('Validation_Accuracy', accuracy)
        my_summary_op = tf.summary.merge_all()
        print("finished Define some scalar quantities to monitor")
        #Get your supervisor
        sv = tf.train.Supervisor(logdir=log_eval,
                                 summary_op=None,
                                 saver=None,
                                 init_fn=restore_fn)
        print("finished tf.train.Supervisor")

        #Now we are ready to run in one session
        with sv.managed_session() as sess:
            #with sv.managed_session() as sess:
            print("begin sv.managed_session()")
            print(int(num_steps_per_epoch * num_epochs))
            for step in range(int(num_steps_per_epoch * num_epochs)):
                print(step)
                sess.run(sv.global_step)
                print(
                    "print vital information every start of the epoch as always"
                )
                #print vital information every start of the epoch as always
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch: %s/%s',
                                 step / num_batches_per_epoch + 1, num_epochs)
                    logging.info('Current Streaming Accuracy: %.4f',
                                 sess.run(accuracy))

                #Compute summaries every 10 steps and continue evaluating
                if step % 10 == 0:
                    eval_step(sess,
                              metrics_op=metrics_op,
                              global_step=sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                #Otherwise just run as per normal
                else:
                    eval_step(sess,
                              metrics_op=metrics_op,
                              global_step=sv.global_step)

            #At the end of all the evaluation, show the final accuracy
            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))
            print(
                "Now we want to visualize the last batch's images just to see what our model has predicted"
            )
            #Now we want to visualize the last batch's images just to see what our model has predicted
            raw_images, labels, predictions = sess.run(
                [raw_images, labels, predictions])
            for i in range(10):
                image, label, prediction = raw_images[i], labels[
                    i], predictions[i]
                prediction_name, label_name = dataset.labels_to_name[
                    prediction], dataset.labels_to_name[label]
                text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name,
                                                               label_name)
                print(text)

            logging.info(
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.'
            )
コード例 #23
0
ファイル: dcgan_anim.py プロジェクト: kascia/generative
def main():

    global_step = get_or_create_global_step()
    z = tf.placeholder(tf.float32, [batch_size, noise_size])
    x = tf.placeholder(tf.float32, [batch_size, height, width, channel])
    x_reshaped = tf.reshape(x, [batch_size, height, width, 3])
    x_resized = tf.image.resize_images(x_reshaped, [32, 32])

    G = generator(z)
    D_real = discriminator(x_resized)
    D_fake = discriminator(G)
    var_generator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='generator')
    var_discriminator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope='discriminator')

    loss_discriminator = tf.reduce_mean(
        -tf.log(D_real + epsilon) - tf.log(1 - D_fake + epsilon),
        axis=0) + get_reg_loss()
    #loss_generator = tf.reduce_mean(tf.log(1-D_fake), axis=0)
    loss_generator = tf.reduce_mean(-tf.log(D_fake), axis=0)

    optimize_discriminator = tf.train.AdamOptimizer(
        learning_rate * 0.1, beta1=0.5).minimize(loss=loss_discriminator,
                                                 var_list=var_discriminator,
                                                 global_step=global_step)
    optimize_generator = tf.train.AdamOptimizer(
        learning_rate, beta1=0.5).minimize(loss=loss_generator,
                                           var_list=var_generator,
                                           global_step=global_step)
    """
    optimizer_discriminator = tf.train.AdamOptimizer(learning_rate*0.2, beta1=0.9)
    grads_and_vars_d = optimizer_discriminator.compute_gradients(
        loss=loss_discriminator, var_list=var_discriminator)
    clipped_grads_and_vars_d = [(tf.clip_by_norm(grad, 5.0),var) for grad, var
                              in grads_and_vars_d]
    optimize_discriminator = optimizer_discriminator.apply_gradients(
                                clipped_grads_and_vars_d,
                                global_step=global_step)

    optimizer_generator= tf.train.AdamOptimizer(learning_rate, beta1=0.9)
    grads_and_vars_g = optimizer_generator.compute_gradients(
        loss=loss_generator, var_list=var_generator)
    clipped_grads_and_vars_g = [(tf.clip_by_norm(grad, 5.0),var) for grad, var
                                in grads_and_vars_g]
    optimize_generator = optimizer_generator.apply_gradients(
                            clipped_grads_and_vars_g,
                            global_step=global_step)
    """

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    print('g', [item.name for item in var_generator])
    print('d', [item.name for item in var_discriminator])

    saver = tf.train.Saver()

    sess = tf.Session()

    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt:
        print('load_model', ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        print('initialize_model')
        sess.run(init_op)

    writer = tf.summary.FileWriter(tb_dir, sess.graph)
    for i in range(num_epoch):
        for k in range(num_k):
            real = get_next_batch(batch_size)
            noise = generate_random_normal_vector()
            """
            _D_real, _D_fake, _loss_d, _loss_g, _, _, m = sess.run(
                                            [D_real,
                                            D_fake,
                                            loss_discriminator,
                                            loss_generator,
                                            optimize_discriminator,
                                            optimize_generator,
                                            merged],
                                            feed_dict={x:real, z:noise})
            """
            _D_real, _loss_d, _ = sess.run(
                [D_real, loss_discriminator, optimize_discriminator],
                feed_dict={
                    x: real,
                    z: noise
                })
        noise = generate_random_normal_vector()
        _D_fake, _loss_g, _ = sess.run(
            [D_fake, loss_generator, optimize_generator], feed_dict={z: noise})
        if i % 100 == 0:
            print('%g th step' % i)
            print('D_real : %g' % np.mean(_D_real))
            print('D_fake : %g' % np.mean(_D_fake))
            print('loss_d', _loss_d)
            print('loss_g', _loss_g)
            print('G', np.mean(sess.run(G, feed_dict={z: noise})))

        if i % 1000 == 1:
            _global_step = sess.run(global_step)
            saver.save(sess,
                       checkpoint_dir + 'model.ckpt',
                       global_step=_global_step)

    #samples = sess.run([G], feed_dict={z:noise})
    #save_generated_samples(samples)
    real = get_next_batch(batch_size)
    noise = generate_random_normal_vector()
    #noise = generate_linspaced_vector()

    save_image = tf.summary.image('generated',
                                  tf.multiply(tf.add(G, 1), 127.5),
                                  max_outputs=batch_size)
    image_summary = sess.run(save_image, feed_dict={z: noise})
    writer.add_summary(image_summary)
    print('write generated samples')

    save_gt = tf.summary.image('ground_truth',
                               tf.multiply(tf.add(x_resized, 1), 127.5),
                               max_outputs=30)
    image_summary = sess.run(save_gt, feed_dict={x: real})
    writer.add_summary(image_summary)
    print('write ground truth')
コード例 #24
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = contrib_framework.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        # Parse pruning hyperparameters
        pruning_hparams = pruning.get_pruning_hparams().parse(
            FLAGS.pruning_hparams)

        # Create a pruning object using the pruning hyperparameters
        pruning_obj = pruning.Pruning(pruning_hparams, global_step=global_step)

        # Use the pruning_obj to add ops to the training graph to update the masks
        # The conditional_mask_update_op will update the masks only when the
        # training step is in [begin_pruning_step, end_pruning_step] specified in
        # the pruning spec proto
        mask_update_op = pruning_obj.conditional_mask_update_op()

        # Use the pruning_obj to add summaries to the graph to track the sparsity
        # of each of the layers
        pruning_obj.add_pruning_summaries()

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1

            def before_run(self, run_context):
                self._step += 1
                self._start_time = time.time()
                return tf.train.SessionRunArgs(loss)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                duration = time.time() - self._start_time
                loss_value = run_values.results
                if self._step % 10 == 0:
                    num_examples_per_step = 128
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str %
                          (datetime.datetime.now(), self._step, loss_value,
                           examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
                # Update the masks
                mon_sess.run(mask_update_op)
コード例 #25
0
        inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
    if is_training and config.keep_prob < 1:
        inputs = tf.nn.dropout(inputs, keep_prob=config.keep_prob)

    outputs = []
    state = _initial_state  # 每一个batch之后初始状态会重置
    with tf.variable_scope("RNN"):
        for time_step in range(num_steps):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            (cell_output, state) = cell(inputs[:, time_step:], state)
            outputs.append(cell_output)
    output = tf.reshape(tf.concat(outputs, axis=1), [-1, hidden_size])

# 关于学习率和梯度的控制方法
_lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                  config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(_lr)
train_op = optimizer.apply_gradients(
    zip(grads, tvars), global_step=framework.get_or_create_global_step())

# 对学习率进行赋值
_new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr")
_lr_update = tf.assign(_lr, _new_lr)


def assign_lr(sess, lr_value):
    sess.run(_lr_update, feed_dict={_new_lr: lr_value})
コード例 #26
0
def run():
    #Create the log directory here. Must be done here otherwise import will activate this unneededly.
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    #======================= TRAINING PROCESS =========================
    #Now we start to construct the graph and build our model
    with tf.Graph().as_default() as graph:

        #"First create the dataset and load one batch"
        dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
        images, _, labels = load_batch(dataset, batch_size=batch_size)
        #"Know the number steps to take before decaying the learning rate and batches per epoch"
        print("dataset.num_samples", dataset.num_samples)
        num_batches_per_epoch = int(dataset.num_samples / batch_size)
        print("num_batches_per_epoch", num_batches_per_epoch)
        num_steps_per_epoch = num_batches_per_epoch  #Because one step is one batch processed
        decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
        print("decay_steps", decay_steps)

        #"Create the model inference"
        print("dataset.num_classes", dataset.num_classes)
        with slim.arg_scope(inception_resnet_v2_arg_scope()):
            logits, end_points = inception_resnet_v2(
                images, num_classes=dataset.num_classes, is_training=True)

        #"Define the scopes that you want to exclude for restoration"
        exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits']
        variables_to_restore = slim.get_variables_to_restore(exclude=exclude)

        #"Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)"
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        #"Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks"
        loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                               logits=logits)
        total_loss = tf.losses.get_total_loss(
        )  #obtain the regularization losses as well

        #"Create the global step for monitoring the learning_rate and training."
        global_step = get_or_create_global_step()

        #"Define your exponentially decaying learning rate"
        lr = tf.train.exponential_decay(learning_rate=initial_learning_rate,
                                        global_step=global_step,
                                        decay_steps=decay_steps,
                                        decay_rate=learning_rate_decay_factor,
                                        staircase=True)

        #Now we can define the optimizer that takes on the learning rate
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)

        #"Create the train_op."
        train_op = slim.learning.create_train_op(total_loss, optimizer)

        #"Accuracy"
        predictions = tf.argmax(end_points['Predictions'], 1)
        probabilities = end_points['Predictions']
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)
        metrics_op = tf.group(accuracy_update, probabilities)

        #"Summary's"
        print("total_loss", total_loss)
        tf.summary.scalar('losses/Total_Loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('learning_rate', lr)
        my_summary_op = tf.summary.merge_all()

        def train_step(sess, train_op, global_step):
            #"training step function that runs both the train_op, metrics_op and updates the global_step concurrently."
            #"Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step"
            #Check the time for each sess run
            start_time = time.time()
            total_loss, global_step_count, _ = sess.run(
                [train_op, global_step, metrics_op])
            time_elapsed = time.time() - start_time

            #Run the logging to print some results
            print('global step %s: loss: %.4f (%.2f sec/step)' %
                  (global_step_count, total_loss, time_elapsed))
            return total_loss, global_step_count

        #Saver function that restores the variables from a checkpoint file in a sess
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, checkpoint_file)

        #"Define supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory"
        sv = tf.train.Supervisor(logdir=log_dir,
                                 summary_op=None,
                                 init_fn=restore_fn)
        print("restore checkpoint success")

        #Run the managed session
        with sv.managed_session() as sess:
            print("num_steps_per_epoch * num_epochs:",
                  num_steps_per_epoch * num_epochs)
            for step in range(num_steps_per_epoch * num_epochs):
                print("step:%s/%s" % (step, num_steps_per_epoch * num_epochs))
                #At the start of every epoch, show the vital information:
                if step % num_batches_per_epoch == 0:
                    print('Epoch %s/%s' %
                          (step / num_batches_per_epoch + 1, num_epochs))
                    learning_rate_value, accuracy_value = sess.run(
                        [lr, accuracy])

                    print('Current Learning Rate: %s' % learning_rate_value)

                    print('Current Streaming Accuracy: %s' % accuracy_value)

                    # optionally, print your logits and predictions for a sanity check that things are going fine.
                    logits_value, probabilities_value, predictions_value, labels_value = sess.run(
                        [logits, probabilities, predictions, labels])
                    print('predictions: \n', predictions_value)
                    print('Labels:\n', labels_value)

                #Log the summaries every 10 step.
                if step % 100 == 0:
                    loss, _ = train_step(sess, train_op, sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                #If not, simply run the training step
                else:
                    loss, _ = train_step(sess, train_op, sv.global_step)

            #We log the final training loss and accuracy
            print('Final Loss: %s' % loss)
            sess_accuracy = sess.run(accuracy)
            print('Final Accuracy: %s' % sess_accuracy)

            #Once all the training has been done, save the log files and checkpoint model
            print('Finished training! Saving model to disk now.')
            # saver.save(sess, "./flowers_model.ckpt")
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
            print('Finished training! Saved model to disk now.')
コード例 #27
0
ファイル: train.py プロジェクト: ravimeda/glas
def train_model(config):
    """ Train the model using the passed in config """
    training_devices = [
        graph_utils.device_fn(device)
        for device in graph_utils.collect_devices({'GPU': FLAGS.num_gpus})]
    assert training_devices, 'Found no training devices!'

    ###########################################################
    # Create the input pipeline
    ###########################################################
    with tf.device('/cpu:0'), tf.name_scope('input_pipeline'):
        dataset = input_utils.get_dataset(
            config.datadir, config.dataset, 'train',
            num_folds=config.fold_count, fold=config.fold, holdout=False)

        init_op, init_feed_dict, image = input_utils.get_data(
            config.dataset, dataset, config.batch_size,
            num_epochs=config.num_epochs,
            num_readers=config.num_readers)

        inputs_queue = input_utils.batch_images(
            image, config.batch_size,
            num_threads=config.num_preprocessing_threads,
            num_devices=len(training_devices))

    ###########################################################
    # Generate the model
    ###########################################################
    towers = graph_utils.create_towers(
        create_training_model, training_devices, config, inputs_queue, dataset)
    assert towers, 'No training towers were created!'

    ###########################################################
    # Setup the training objectives
    ###########################################################
    with tf.name_scope('training'):
        with tf.device('/cpu:0'):
            learning_rate_decay_step = config.learning_rate_decay_step / len(towers)
            learning_rate = tf.maximum(
                exponential_decay(
                    config.batch_size, learning_rate_decay_step,
                    config.learning_rate, config.learning_rate_decay, dataset),
                config.learning_rate_min, name='learning_rate')
            tf.add_to_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS, learning_rate)

            optimizer = tf.train.AdamOptimizer(learning_rate)

        # Calculate gradients and total loss
        tower_klds, tower_losses, grads_and_vars = graph_utils.optimize_towers(
            optimizer, towers, clip_norm=config.clip)
        total_kld = tf.add_n(tower_klds, name='total_kld') if tower_klds else None
        total_loss = tf.add_n(tower_losses, name='total_loss')

        # Gather update ops from the first tower (for updating batch_norm for example)
        global_step = framework.get_or_create_global_step()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, towers[0].scope)
        update_ops.append(optimizer.apply_gradients(grads_and_vars, global_step=global_step))

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_op = tf.identity(total_loss, name='train_op')

    ###########################################################
    # Collect summaries
    ###########################################################
    with tf.device('/cpu:0'):
        summaries = []
        summaries.extend(learning.add_gradients_summaries(grads_and_vars))
        summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES))
        summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.METRICS))
        summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
        summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS))

        with tf.name_scope('losses'):
            if total_kld is not None:
                summaries.append(tf.summary.scalar('total_kld', total_kld))
            summaries.append(tf.summary.scalar('total_loss', total_loss))

            for loss in tower_losses:
                summaries.append(tf.summary.scalar(loss.op.name, loss))

            for loss in tf.losses.get_losses():
                summaries.append(tf.summary.scalar(loss.op.name, loss))

        summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin training
    ###########################################################
    global_init_op = tf.global_variables_initializer()
    init_op = global_init_op if init_op is None else tf.group(global_init_op, init_op)
    session_config = tf.ConfigProto(
        allow_soft_placement=False,
        log_device_placement=FLAGS.log_device_placement)

    prefetch_queue_buffer = 2 * len(training_devices)
    number_of_steps = int(int(dataset.num_samples / config.batch_size) / len(training_devices))
    number_of_steps = number_of_steps * config.num_epochs - prefetch_queue_buffer

    tf.logging.info('Running %s steps', number_of_steps)
    learning.train(
        train_op, FLAGS.log_dir, session_config=session_config,
        global_step=global_step, number_of_steps=number_of_steps,
        init_op=init_op, init_feed_dict=init_feed_dict,
        save_interval_secs=config.checkpoint_frequency,
        summary_op=summary_op, save_summaries_secs=config.summary_frequency,
        trace_every_n_steps=config.trace_frequency if config.trace_frequency > 0 else None)
コード例 #28
0
def main():
    data_path = args.data
    vocab_path = args.vocab
    save_dir = args.save_dir
    word_dim = args.word_dim
    sentence_dim = args.sentence_dim
    omit_prob = args.omit_prob
    swap_prob = args.swap_prob
    config_path = args.config
    batch_size = args.batch_size
    max_epoch = args.max_epoch
    max_length = args.max_length

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Check whether all needed options are given
    if config_path is not None:
        assert (word_dim is None and sentence_dim is None and omit_prob is None
                and swap_prob is None), (
                    'Model hyperparameter options must not be provided when '
                    'the "config" option is given.')
        config = ModelConfig.load(config_path)
    else:
        assert not (
            word_dim is None or sentence_dim is None or omit_prob is None
            or swap_prob is None), (
                'All model hyperparameter options must be provided when '
                'the "config" option is not given.')
        config = ModelConfig(word_dim=word_dim,
                             sentence_dim=sentence_dim,
                             omit_prob=omit_prob,
                             swap_prob=swap_prob)
        config_path = os.path.join(save_dir, 'config.ini')
        config.save(config_path)

    logging.info('Initializing the data generator...')
    data_generator = DataGenerator(data_path=data_path,
                                   vocab_path=vocab_path,
                                   eos_symbol='<EOS>',
                                   unk_symbol='<UNK>',
                                   omit_prob=config.omit_prob,
                                   swap_prob=config.swap_prob,
                                   batch_size=batch_size,
                                   max_length=max_length,
                                   max_epoch=max_epoch)
    with tf.Graph().as_default() as graph:
        with tf.Session() as sess:
            logging.info('Building the model...')
            # Placeholders
            inputs = tf.placeholder(dtype=tf.int32,
                                    shape=[None, None],
                                    name='inputs')
            inputs_length = tf.placeholder(dtype=tf.int32,
                                           shape=[None],
                                           name='inputs_length')
            targets = tf.placeholder(dtype=tf.int32,
                                     shape=[None, None],
                                     name='targets')
            targets_length = tf.placeholder(dtype=tf.int32,
                                            shape=[None],
                                            name='targets_length')

            vocab_size = len(data_generator.vocab)
            embeddings = tf.get_variable(name='embeddings',
                                         shape=[vocab_size, config.word_dim],
                                         dtype=tf.float32)

            with tf.variable_scope('decoder'):
                with tf.variable_scope('output') as output_scope:
                    # This variable-scope-trick is used to ensure that
                    # output_fn has a proper scope regardless of a caller's
                    # scope.
                    def output_fn(cell_outputs):
                        return layers.fully_connected(inputs=cell_outputs,
                                                      num_outputs=vocab_size,
                                                      activation_fn=None,
                                                      scope=output_scope)

            rnn_cell = rnn.GRUBlockCell(config.sentence_dim)
            encoder_state = sae.encode(cell=rnn_cell,
                                       embeddings=embeddings,
                                       inputs=inputs,
                                       inputs_length=inputs_length,
                                       scope='encoder')
            decoder_outputs = sae.decode_train(cell=rnn_cell,
                                               embeddings=embeddings,
                                               encoder_state=encoder_state,
                                               targets=targets[:, :-1],
                                               targets_length=targets_length -
                                               1,
                                               scope='decoder')
            generated = sae.decode_inference(
                cell=rnn_cell,
                embeddings=embeddings,
                encoder_state=encoder_state,
                output_fn=output_fn,
                vocab_size=vocab_size,
                bos_id=data_generator.vocab['<EOS>'],
                eos_id=data_generator.vocab['<EOS>'],
                max_length=max_length,
                scope='decoder',
                reuse=True)
            loss = sae.loss(decoder_outputs=decoder_outputs,
                            output_fn=output_fn,
                            targets=targets[:, 1:],
                            targets_length=targets_length - 1)

            global_step = get_or_create_global_step()
            train_op = slim.optimize_loss(loss=loss,
                                          global_step=global_step,
                                          learning_rate=None,
                                          optimizer=tf.train.AdamOptimizer(),
                                          clip_gradients=5.0)

            summary_writer = tf.summary.FileWriter(logdir=os.path.join(
                save_dir, 'log'),
                                                   graph=graph)
            summary = tf.summary.merge_all()

            tf.get_variable_scope().set_initializer(
                tf.random_normal_initializer(mean=0.0, stddev=0.01))
            tf.global_variables_initializer().run()

            saver = tf.train.Saver(max_to_keep=20)

            logging.info('Training starts!')
            for data_batch in data_generator:
                (inputs_v, inputs_length_v, targets_v,
                 targets_length_v) = data_batch
                summary_v, global_step_v, _ = sess.run(
                    fetches=[summary, global_step, train_op],
                    feed_dict={
                        inputs: inputs_v,
                        inputs_length: inputs_length_v,
                        targets: targets_v,
                        targets_length: targets_length_v
                    })
                summary_writer.add_summary(summary=summary_v,
                                           global_step=global_step_v)
                if global_step_v % 100 == 0:
                    logging.info('{} Iter #{}, Epoch {:.2f}'.format(
                        datetime.now(), global_step_v,
                        data_generator.progress))
                    num_samples = 2
                    (inputs_sample_v, inputs_length_sample_v, targets_sample_v,
                     targets_length_sample_v) = (
                         data_generator.sample(num_samples))
                    generated_v = sess.run(fetches=generated,
                                           feed_dict={
                                               inputs:
                                               inputs_sample_v,
                                               inputs_length:
                                               inputs_length_sample_v
                                           })
                    for i in range(num_samples):
                        logging.info('-' * 60)
                        logging.info('Sample #{}'.format(i))
                        inputs_sample_words = data_generator.ids_to_words(
                            inputs_sample_v[i][:inputs_length_sample_v[i]])
                        targets_sample_words = data_generator.ids_to_words(
                            targets_sample_v[i][1:targets_length_sample_v[i]])
                        generated_words = data_generator.ids_to_words(
                            generated_v[i])
                        if '<EOS>' in generated_words:
                            eos_index = generated_words.index('<EOS>')
                            generated_words = generated_words[:eos_index + 1]
                        logging.info('Input: {}'.format(
                            ' '.join(inputs_sample_words)))
                        logging.info('Target: {}'.format(
                            ' '.join(targets_sample_words)))
                        logging.info('Generated: {}'.format(
                            ' '.join(generated_words)))
                    logging.info('-' * 60)

                if global_step_v % 500 == 0:
                    save_path = os.path.join(save_dir, 'model.ckpt')
                    real_save_path = saver.save(sess=sess,
                                                save_path=save_path,
                                                global_step=global_step_v)
                    logging.info(
                        'Saved the checkpoint to: {}'.format(real_save_path))
コード例 #29
0
ファイル: a3c.py プロジェクト: scturtle/rl
 def get_train_global_op(self, global_network):
     assert len(self.gradients) == len(global_network.vars)
     return global_network.optimizer.apply_gradients(
         zip(self.gradients, global_network.vars),
         global_step=get_or_create_global_step())
コード例 #30
0
    def __init__(self, is_training, config, input_):
        self._input = input_
        batch_size = input_.batch_size
        num_steps = input_.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        def lstm_cell():
            return rnn.BasicLSTMCell(size,
                                     forget_bias=0.0,
                                     state_is_tuple=True)

        attn_cell = lstm_cell
        if is_training and config.keep_prob < 1:

            def attn_cell():
                return rnn.DropoutWrapper(lstm_cell(),
                                          output_keep_prob=config.keep_prob)

        cell = rnn.MultiRNNCell(
            [attn_cell() for _ in range(config.num_layers)],
            state_is_tuple=True)
        self._initial_state = cell.zero_state(batch_size, tf.float32)
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        dtype=tf.float32)
            inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)
        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size],
                                    dtype=tf.float32)
        softmax_b = tf.get_variable("softmax_b", [vocab_size],
                                    dtype=tf.float32)
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(input_.targets, [-1])],
            [tf.ones([batch_size * num_steps], dtype=tf.float32)])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state
        if not is_training:
            return
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=framework.get_or_create_global_step())
        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)