예제 #1
0
파일: model.py 프로젝트: zxgineng/deepnlp
 def _build_train_op(self):
     global_step = tf.train.get_global_step()
     self.train_op = slim.optimize_loss(
         self.loss,
         global_step,
         optimizer=tf.train.MomentumOptimizer(Config.train.initial_lr, 0.9),
         learning_rate=Config.train.initial_lr)
예제 #2
0
파일: model.py 프로젝트: zxgineng/deepcv
 def _build_optimizer(self):
     global_step = tf.train.get_global_step()
     self.train_op = slim.optimize_loss(
         self.loss, global_step,
         optimizer='Adam',
         learning_rate=Config.train.learning_rate,
         summaries=['loss'],
         variables=tf.trainable_variables('generation'),
         name="train_op")
예제 #3
0
    def _build_train_op(self):
        global_step = tf.train.get_global_step()
        learning_rate = Config.train.initial_lr / (Config.train.decay_rate * Config.train.epoch + 1)

        self.train_op = slim.optimize_loss(
            self.loss, global_step,
            optimizer=tf.train.AdagradOptimizer(learning_rate),
            learning_rate=learning_rate,
            clip_gradients=Config.train.max_gradient_norm)
예제 #4
0
파일: model.py 프로젝트: zxgineng/deepnlp
 def _build_train_op(self):
     global_step = tf.train.get_global_step()
     learning_rate = Config.train.learning_rate * (
         Config.train.learning_decay_rate**Config.train.epoch)
     self.train_op = slim.optimize_loss(
         self.loss,
         global_step,
         optimizer='Adam',
         learning_rate=learning_rate,
         clip_gradients=Config.train.max_gradient_norm)
예제 #5
0
파일: model.py 프로젝트: zxgineng/deepnlp
    def _build_train_op(self):
        global_step = tf.train.get_global_step()
        learning_rate = Config.train.initial_lr / (
            1 + Config.train.lr_decay * Config.train.epoch)

        self.train_op = slim.optimize_loss(
            self.loss,
            global_step,
            optimizer=tf.train.MomentumOptimizer(learning_rate, 0.9),
            learning_rate=learning_rate,
            clip_gradients=Config.train.max_gradient_norm)
예제 #6
0
파일: model.py 프로젝트: zxgineng/deepnlp
    def _build_train_op(self):

        def clip_gradient(grads_and_vars):
            clipped = [(tf.clip_by_norm(grad, Config.train.clip_gradients), var) for grad, var in grads_and_vars]
            return clipped

        global_step = tf.train.get_global_step()
        self.train_op = slim.optimize_loss(
            self.loss, global_step,
            optimizer=tf.train.AdadeltaOptimizer(1.0, epsilon=1e-6),
            learning_rate=1.0,
            clip_gradients=clip_gradient)
예제 #7
0
    def _build_train_op(self):
        global_step = tf.train.get_global_step()
        learning_rate = Config.train.initial_lr * tf.pow(
            0.75, tf.cast(global_step // 5000, tf.float32))

        self.train_op = slim.optimize_loss(
            self.loss,
            global_step,
            optimizer=tf.train.AdamOptimizer(learning_rate,
                                             beta1=0.9,
                                             beta2=0.9),
            learning_rate=learning_rate,
            clip_gradients=Config.train.max_gradient_norm)
예제 #8
0
    def _build_train_op(self):
        global_step = tf.train.get_global_step()

        if Config.train.epoch <= 10:
            learning_rate = Config.train.initial_lr
        else:
            learning_rate = Config.train.initial_lr * 0.1

        self.train_op = slim.optimize_loss(
            self.loss, global_step,
            optimizer=tf.train.MomentumOptimizer(learning_rate,0.9),
            learning_rate=learning_rate,
            clip_gradients=Config.train.max_gradient_norm)
예제 #9
0
파일: model.py 프로젝트: zxgineng/deepnlp
    def _build_train_op(self):
        global_step = tf.train.get_global_step()

        if Config.train.epoch < Config.train.max_epoch * 0.8:
            learning_rate = Config.train.learning_rate
        else:
            learning_rate = Config.train.learning_rate * 0.1

        self.train_op = slim.optimize_loss(
            self.loss,
            global_step,
            optimizer=tf.train.MomentumOptimizer(learning_rate, 0.9),
            learning_rate=learning_rate)
예제 #10
0
파일: model.py 프로젝트: zxgineng/deepcv
 def _build_optimizer(self):
     global_step = tf.train.get_global_step()
     learning_rate = tf.train.exponential_decay(
         Config.train.learning_rate, global_step,
         Config.train.learning_decay_steps,
         Config.train.learning_decay_rate)
     self.train_op = slim.optimize_loss(self.loss,
                                        global_step,
                                        optimizer=Config.train.get(
                                            'optimizer', 'Adam'),
                                        learning_rate=learning_rate,
                                        summaries=['loss'],
                                        name="train_op")
예제 #11
0
파일: model.py 프로젝트: zxgineng/deepnlp
    def _build_train_op(self):
        global_step = tf.train.get_global_step()

        learning_rate = Config.train.learning_rate * tf.pow(
            Config.train.learning_decay_rate,
            tf.cast(Config.train.epoch, tf.float32))

        self.train_op = slim.optimize_loss(
            self.loss,
            global_step,
            optimizer=tf.train.MomentumOptimizer(learning_rate, 0.9),
            learning_rate=learning_rate,
            clip_gradients=Config.train.max_gradient_norm)
예제 #12
0
파일: model.py 프로젝트: zxgineng/deepnlp
    def _build_train_op(self):
        global_step = tf.train.get_global_step()
        if Config.train.switch_optimizer == 0:
            print('using Adadelta')
            optimizer = tf.train.AdadeltaOptimizer(Config.train.initial_lr)
            lr = Config.train.initial_lr
        else:
            print('using SGD')
            optimizer = tf.train.GradientDescentOptimizer(Config.train.sgd_lr)
            lr = Config.train.sgd_lr

        self.train_op = slim.optimize_loss(self.loss,
                                           global_step,
                                           optimizer=optimizer,
                                           learning_rate=lr)
예제 #13
0
def cnn_model(learning_rate):
    with tf.Graph().as_default() as graph:
        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
        x = tf.placeholder(tf.float32, shape=[None, 784])
        y = tf.placeholder(tf.float32, shape=[None, 10])
        x_train = tf.reshape(x, [-1, 28, 28, 1])
        tf.summary.image('input', x_train, 10)

        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            normalizer_fn=slim.batch_norm,
                            activation_fn=tf.nn.relu):
            with slim.arg_scope([slim.max_pool2d], padding='SAME'):
                conv1 = slim.conv2d(x_train, 32, [5, 5])
                conv_vars = tf.get_collection(tf.GraphKeys.MODEL_VARIABLES,
                                              'Conv')
                tf.summary.histogram('conv_weights', conv_vars[0])
                pool1 = slim.max_pool2d(conv1, [2, 2])
                conv2 = slim.conv2d(pool1, 64, [5, 5])
                pool2 = slim.max_pool2d(conv2, [2, 2])
                flatten = slim.flatten(pool2)
                fc = slim.fully_connected(flatten, 1024)

        logits = slim.fully_connected(fc, 10, activation_fn=None)
        softmax = tf.nn.softmax(logits, name='output')

        with tf.name_scope('loss'):
            loss = slim.losses.softmax_cross_entropy(logits, y)
            tf.summary.scalar('loss', loss)

        train_op = slim.optimize_loss(loss,
                                      slim.get_or_create_global_step(),
                                      learning_rate=learning_rate,
                                      optimizer='Adam')

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(logits, 1),
                                          tf.argmax(y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar('accuracy', accuracy)
        summary = tf.summary.merge_all()

    return {
        'x': x,
        'y': y,
        'accuracy': accuracy,
        'summary': summary,
        'mnist': mnist
    }, train_op, graph
예제 #14
0
    def build_model(self):
        config = self.config
        data_generator = self.data_generator
        logging.info('Building the model...')
        # Placeholders
        self.inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name='inputs')
        self.inputs_length = tf.placeholder(dtype=tf.int32, shape=[None], name='inputs_length')
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='targets')
        self.targets_length = tf.placeholder(dtype=tf.int32, shape=[None], name='targets_length')

        vocab_size = len(data_generator.vocab)
        embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32)

        with tf.variable_scope('decoder'):
            with tf.variable_scope('output') as output_scope:
                # This variable-scope-trick is used to ensure that
                # output_fn has a proper scope regardless of a caller's
                # scope.
                def output_fn(cell_outputs):
                    return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None,
                        scope=output_scope)

        self.rnn_cell = rnn.GRUBlockCell(config.sentence_dim)
        self.encoder_state = self.encode(cell=self.rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length,
            scope='encoder')
        self.decoder_outputs = self.decode_train(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state,
            targets=self.targets[:, :-1], targets_length=self.targets_length - 1, scope='decoder')
        self.generated = self.decode_inference(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state,
            output_fn=output_fn, vocab_size=vocab_size, bos_id=data_generator.vocab['<EOS>'],
            eos_id=data_generator.vocab['<EOS>'], max_length=config.max_length, scope='decoder', reuse=True)
        self.loss = self.loss(decoder_outputs=self.decoder_outputs, output_fn=output_fn, targets=targets[:, 1:],
                        targets_length=self.targets_length - 1)

        self.global_step = get_or_create_global_step()
        self.train_op = slim.optimize_loss(loss=self.loss, global_step=self.global_step, learning_rate=None,
            optimizer=tf.train.AdamOptimizer(), clip_gradients=5.0)

        self.summary_writer = tf.summary.FileWriter(logdir=os.path.join(config.save_dir, 'log'))
        self.summary = tf.summary.merge_all()

        tf.get_variable_scope().set_initializer(tf.random_normal_initializer(mean=0.0, stddev=0.01))
        tf.global_variables_initializer().run()

        self.saver = tf.train.Saver(max_to_keep=20)
예제 #15
0
파일: TFRecod.py 프로젝트: chsl/test
def model(image, label):
    net = slim.conv2d(image, 48, [5, 5], scope='conv1')
    net = slim.max_pool2d(net, [2, 2], scope='pool1')
    net = slim.conv2d(net, 96, [5, 5], scope='conv2')
    net = slim.max_pool2d(net, [2, 2], scope='pool2')
    net = slim.flatten(net, scope='flatten')
    net = slim.fully_connected(net, 512, scope='fully_connected1')
    logits = slim.fully_connected(net,
                                  10,
                                  activation_fn=None,
                                  scope='fully_connected2')

    prob = slim.softmax(logits)
    loss = slim.losses.softmax_cross_entropy(logits, label)

    train_op = slim.optimize_loss(loss,
                                  slim.get_global_step(),
                                  learning_rate=0.001,
                                  optimizer='Adam')

    return train_op
예제 #16
0
        def model_fn(features, labels, mode, params=None, config=None):
            train_op = None
            loss = None
            eval_metrics = None
            predictions = None
            if mode == ModeKeys.TRAIN:
                transformer_model = TransformerModule(params=self.model_params)
                step = slim.get_or_create_global_step()
                loss = transformer_model(features)
                train_op = slim.optimize_loss(loss=loss,
                                              global_step=step,
                                              learning_rate=self.training_params["learning_rate"],
                                              clip_gradients=self.training_params["clip_gradients"],
                                              optimizer=params["optimizer"],
                                              summaries=slim.OPTIMIZER_SUMMARIES
                                              )
            elif mode == ModeKeys.PREDICT:
                raise NotImplementedError
            elif mode == ModeKeys.EVAL:
                transformer_model = TransformerModule(params=self.model_params)
                loss = transformer_model(features)

            return EstimatorSpec(train_op=train_op, loss=loss, eval_metric_ops=eval_metrics, predictions=predictions,
                                 mode=mode)
def main():
    data_path = args.data
    vocab_path = args.vocab
    save_dir = args.save_dir
    word_dim = args.word_dim
    sentence_dim = args.sentence_dim
    omit_prob = args.omit_prob
    swap_prob = args.swap_prob
    config_path = args.config
    batch_size = args.batch_size
    max_epoch = args.max_epoch
    max_length = args.max_length

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Check whether all needed options are given
    if config_path is not None:
        assert (word_dim is None and sentence_dim is None and omit_prob is None
                and swap_prob is None), (
                    'Model hyperparameter options must not be provided when '
                    'the "config" option is given.')
        config = ModelConfig.load(config_path)
    else:
        assert not (
            word_dim is None or sentence_dim is None or omit_prob is None
            or swap_prob is None), (
                'All model hyperparameter options must be provided when '
                'the "config" option is not given.')
        config = ModelConfig(word_dim=word_dim,
                             sentence_dim=sentence_dim,
                             omit_prob=omit_prob,
                             swap_prob=swap_prob)
        config_path = os.path.join(save_dir, 'config.ini')
        config.save(config_path)

    logging.info('Initializing the data generator...')
    data_generator = DataGenerator(data_path=data_path,
                                   vocab_path=vocab_path,
                                   eos_symbol='<EOS>',
                                   unk_symbol='<UNK>',
                                   omit_prob=config.omit_prob,
                                   swap_prob=config.swap_prob,
                                   batch_size=batch_size,
                                   max_length=max_length,
                                   max_epoch=max_epoch)
    with tf.Graph().as_default() as graph:
        with tf.Session() as sess:
            logging.info('Building the model...')
            # Placeholders
            inputs = tf.placeholder(dtype=tf.int32,
                                    shape=[None, None],
                                    name='inputs')
            inputs_length = tf.placeholder(dtype=tf.int32,
                                           shape=[None],
                                           name='inputs_length')
            targets = tf.placeholder(dtype=tf.int32,
                                     shape=[None, None],
                                     name='targets')
            targets_length = tf.placeholder(dtype=tf.int32,
                                            shape=[None],
                                            name='targets_length')

            vocab_size = len(data_generator.vocab)
            embeddings = tf.get_variable(name='embeddings',
                                         shape=[vocab_size, config.word_dim],
                                         dtype=tf.float32)

            with tf.variable_scope('decoder'):
                with tf.variable_scope('output') as output_scope:
                    # This variable-scope-trick is used to ensure that
                    # output_fn has a proper scope regardless of a caller's
                    # scope.
                    def output_fn(cell_outputs):
                        return layers.fully_connected(inputs=cell_outputs,
                                                      num_outputs=vocab_size,
                                                      activation_fn=None,
                                                      scope=output_scope)

            rnn_cell = rnn.GRUBlockCell(config.sentence_dim)
            encoder_state = sae.encode(cell=rnn_cell,
                                       embeddings=embeddings,
                                       inputs=inputs,
                                       inputs_length=inputs_length,
                                       scope='encoder')
            decoder_outputs = sae.decode_train(cell=rnn_cell,
                                               embeddings=embeddings,
                                               encoder_state=encoder_state,
                                               targets=targets[:, :-1],
                                               targets_length=targets_length -
                                               1,
                                               scope='decoder')
            generated = sae.decode_inference(
                cell=rnn_cell,
                embeddings=embeddings,
                encoder_state=encoder_state,
                output_fn=output_fn,
                vocab_size=vocab_size,
                bos_id=data_generator.vocab['<EOS>'],
                eos_id=data_generator.vocab['<EOS>'],
                max_length=max_length,
                scope='decoder',
                reuse=True)
            loss = sae.loss(decoder_outputs=decoder_outputs,
                            output_fn=output_fn,
                            targets=targets[:, 1:],
                            targets_length=targets_length - 1)

            global_step = get_or_create_global_step()
            train_op = slim.optimize_loss(loss=loss,
                                          global_step=global_step,
                                          learning_rate=None,
                                          optimizer=tf.train.AdamOptimizer(),
                                          clip_gradients=5.0)

            summary_writer = tf.summary.FileWriter(logdir=os.path.join(
                save_dir, 'log'),
                                                   graph=graph)
            summary = tf.summary.merge_all()

            tf.get_variable_scope().set_initializer(
                tf.random_normal_initializer(mean=0.0, stddev=0.01))
            tf.global_variables_initializer().run()

            saver = tf.train.Saver(max_to_keep=20)

            logging.info('Training starts!')
            for data_batch in data_generator:
                (inputs_v, inputs_length_v, targets_v,
                 targets_length_v) = data_batch
                summary_v, global_step_v, _ = sess.run(
                    fetches=[summary, global_step, train_op],
                    feed_dict={
                        inputs: inputs_v,
                        inputs_length: inputs_length_v,
                        targets: targets_v,
                        targets_length: targets_length_v
                    })
                summary_writer.add_summary(summary=summary_v,
                                           global_step=global_step_v)
                if global_step_v % 100 == 0:
                    logging.info('{} Iter #{}, Epoch {:.2f}'.format(
                        datetime.now(), global_step_v,
                        data_generator.progress))
                    num_samples = 2
                    (inputs_sample_v, inputs_length_sample_v, targets_sample_v,
                     targets_length_sample_v) = (
                         data_generator.sample(num_samples))
                    generated_v = sess.run(fetches=generated,
                                           feed_dict={
                                               inputs:
                                               inputs_sample_v,
                                               inputs_length:
                                               inputs_length_sample_v
                                           })
                    for i in range(num_samples):
                        logging.info('-' * 60)
                        logging.info('Sample #{}'.format(i))
                        inputs_sample_words = data_generator.ids_to_words(
                            inputs_sample_v[i][:inputs_length_sample_v[i]])
                        targets_sample_words = data_generator.ids_to_words(
                            targets_sample_v[i][1:targets_length_sample_v[i]])
                        generated_words = data_generator.ids_to_words(
                            generated_v[i])
                        if '<EOS>' in generated_words:
                            eos_index = generated_words.index('<EOS>')
                            generated_words = generated_words[:eos_index + 1]
                        logging.info('Input: {}'.format(
                            ' '.join(inputs_sample_words)))
                        logging.info('Target: {}'.format(
                            ' '.join(targets_sample_words)))
                        logging.info('Generated: {}'.format(
                            ' '.join(generated_words)))
                    logging.info('-' * 60)

                if global_step_v % 500 == 0:
                    save_path = os.path.join(save_dir, 'model.ckpt')
                    real_save_path = saver.save(sess=sess,
                                                save_path=save_path,
                                                global_step=global_step_v)
                    logging.info(
                        'Saved the checkpoint to: {}'.format(real_save_path))
예제 #18
0
def main(args):
    print("Starting training with parameters:", vars(args))

    # Load the data
    print("Loading data...")
    timestamp = time()

    # load different aligned SSMs
    multiple_ssms_data = [
        load_ssm_string(args.data),
        #load_ssm_phonetics(args.data),
    ]
    channels = len(multiple_ssms_data)
    print("Found", channels, "SSM channels")

    segment_borders = load_segment_borders(args.data)

    # ADDITIONAL FEATURES FOR AFTER CONVOLUTION
    # token_count_feat = load_linewise_feature(args.data, 'token_count')
    # token_count_feat.set_index(['id'], inplace=True)
    # syllable_count_feat = load_linewise_feature(args.data, 'syllable_count')
    # syllable_count_feat.set_index(['id'], inplace=True)
    char_count_feat = load_linewise_feature(args.data, 'char_count')
    char_count_feat.set_index(['id'], inplace=True)

    if not args.genre:
        train_borders, test_borders = load_segment_borders_watanabe(args.data)
    else:
        # load train/test for some genre only (training is always on whole Watanabe train set)
        train_borders, test_borders = load_segment_borders_for_genre(
            args.data, args.genre)

    train_borders_set = set(train_borders.id)
    test_borders_set = set(test_borders.id)
    train_test_borders_set = train_borders_set.union(test_borders_set)
    print("Done in %.1fs" % tdiff(timestamp))

    # Figure out the maximum ssm size
    print("Gathering dataset statistics...")
    timestamp = time()
    max_ssm_size = 0
    counter = 0
    for ssm_obj in multiple_ssms_data[0].itertuples():
        current_id = ssm_obj.id
        #skip ids not in training or dev
        if not current_id in train_test_borders_set:
            continue

        counter += 1
        max_ssm_size = max(max_ssm_size, ssm_obj.ssm.shape[0])
    print("Done in %.1fs (%.2fk items, max ssm size: %d)" %
          (tdiff(timestamp), k(counter), max_ssm_size))

    # Producing training set
    train_buckets = dict()
    test_buckets = dict()
    print("Producing training set...")
    counter = 0
    filtered = 0
    timestamp = time()
    max_ssm_size = min(max_ssm_size, args.max_ssm_size)

    #allow indexed access to dataframes
    for elem in multiple_ssms_data:
        elem.set_index(['id'], inplace=True)

    for borders_obj in segment_borders.itertuples():
        counter += 1

        # temp. speedup for debugging
        #if counter % 100 != 0:
        #    continue

        current_id = borders_obj.id

        #skip ids not in training or test
        if not current_id in train_test_borders_set:
            continue

        ssm_elems = []
        for single_ssm in multiple_ssms_data:
            ssm_elems.append(single_ssm.loc[current_id].ssm)

        ssm_size = ssm_elems[0].shape[0]

        # Reporting
        if counter % 10000 == 0:
            print("  processed %3.0fk items (%4.1fs, filt.: %4.1fk)" %
                  (k(counter), tdiff(timestamp), k(filtered)))
            timestamp = time()

        # Filter out too small or too large ssm
        if ssm_size < args.min_ssm_size or ssm_size > args.max_ssm_size:
            filtered += 1
            continue

        # Sentences are grouped into buckets to improve performance
        bucket_size = ssm_size
        if not args.buckets:
            bucket_size = max_ssm_size
        bucket_id = int(math.ceil(math.log2(bucket_size)))

        # one tensor for one song
        ssm_tensor = tensor_from_multiple_ssms(ssm_elems, 2**bucket_id,
                                               args.window_size)
        # concatenate all added features at axis=1 here
        added_features = np.concatenate(
            (
                #window_features(token_count_feat.loc[current_id].feat_val),
                #window_features(syllable_count_feat.loc[current_id].feat_val),
                window_features(char_count_feat.loc[current_id].feat_val), ),
            axis=1)
        added_feats_count = added_features.shape[1]

        ssm_labels = labels_from_label_array(borders_obj.borders, ssm_size)

        # fill train/test buckets according to definition files
        if current_id in train_borders_set:
            add_to_buckets(train_buckets, bucket_id, ssm_tensor,
                           added_features, ssm_labels)
        else:
            assert current_id in test_borders_set, 'id ' + current_id + ' is neither in train nor in test!'
            add_to_buckets(test_buckets, bucket_id, ssm_tensor, added_features,
                           ssm_labels)

    del multiple_ssms_data
    del added_features
    del segment_borders
    del train_borders
    del test_borders
    del train_borders_set
    del test_borders_set
    del train_test_borders_set

    # Compacting buckets and printing statistics
    print("Training set buckets:")
    train_buckets = compact_buckets(train_buckets)
    print("Test set buckets:")
    test_buckets = compact_buckets(test_buckets)

    # Define the neural network
    # nn = Dense(window_size=args.window_size, ssm_size=2 ** next(train_buckets.keys().__iter__()))
    nn = NoPadding1Conv(window_size=args.window_size,
                        ssm_size=2**next(train_buckets.keys().__iter__()),
                        added_features_size=added_feats_count,
                        channels=channels)
    # nn = MnistLike(window_size=args.window_size, ssm_size=2 ** next(train_buckets.keys().__iter__()), channels=channels)

    # Defining optimisation problem
    g_global_step = tf.train.get_or_create_global_step()
    g_train_op = slim.optimize_loss(loss=nn.g_loss,
                                    global_step=g_global_step,
                                    learning_rate=None,
                                    optimizer=tf.train.AdamOptimizer(),
                                    clip_gradients=5.0)

    # Logging
    summary_writer = tf.summary.FileWriter(logdir=path.join(
        args.output, 'log'),
                                           graph=tf.get_default_graph())
    g_summary = tf.summary.merge_all()

    saver = tf.train.Saver(max_to_keep=10)

    with tf.Session() as sess:
        # Checkpoint restore / variable initialising
        save_path = path.join(args.output, 'model.ckpt')
        latest_checkpoint = tf.train.latest_checkpoint(args.output)
        if latest_checkpoint is None:
            print("Initializing variables")
            timestamp = time()
            tf.get_variable_scope().set_initializer(
                tf.random_normal_initializer(mean=0.0, stddev=0.01))
            tf.global_variables_initializer().run()
            print("Done in %.2fs" % tdiff(timestamp))
        else:
            print("Restoring from checkpoint variables")
            timestamp = time()
            saver.restore(sess=sess, save_path=latest_checkpoint)
            print("Done in %.2fs" % tdiff(timestamp))

        print()
        timestamp = time()
        global_step_v = 0
        avg_loss = 0.0

        eval_precisions = []
        eval_recalls = []
        eval_fscores = []

        # Training loop
        for epoch in range(args.max_epoch):
            for bucket_id in train_buckets:
                for batch_X, batch_X_added_feats, batch_Y in feed(
                        train_buckets[bucket_id], args.batch_size):
                    # Single training step
                    summary_v, global_step_v, loss_v, _ = sess.run(
                        fetches=[
                            g_summary, g_global_step, nn.g_loss, g_train_op
                        ],
                        feed_dict={
                            nn.g_in: batch_X,
                            nn.g_labels: batch_Y,
                            nn.g_dprob: 0.6,
                            nn.g_added_features: batch_X_added_feats
                        })
                    summary_writer.add_summary(summary=summary_v,
                                               global_step=global_step_v)
                    avg_loss += loss_v

                    # Reporting
                    if global_step_v % args.report_period == 0:
                        print("Iter %d" % global_step_v)
                        print("  epoch: %.0f, avg.loss: %.4f, iter/s: %.4f" %
                              (epoch, avg_loss / args.report_period,
                               args.report_period / tdiff(timestamp)))
                        timestamp = time()
                        avg_loss = 0.0

                    # Evaluation
                    if global_step_v % (args.report_period * 10) == 0:
                        tp = 0
                        fp = 0
                        fn = 0
                        for bucket_id in test_buckets:
                            for test_X, text_X_added_feats, true_Y in feed(
                                    test_buckets[bucket_id], args.batch_size):
                                pred_Y = nn.g_results.eval(
                                    feed_dict={
                                        nn.g_in: test_X,
                                        nn.g_dprob: 1.0,
                                        nn.g_added_features: text_X_added_feats
                                    })
                                try:
                                    _, cur_fp, cur_fn, cur_tp = confusion_matrix(
                                        true_Y, pred_Y).ravel()
                                    tp += cur_tp
                                    fp += cur_fp
                                    fn += cur_fn
                                except Exception as e:
                                    print(e)
                                    print(
                                        confusion_matrix(true_Y,
                                                         pred_Y).ravel())
                                    print(confusion_matrix(true_Y, pred_Y))

                        current_precision = precision(tp, fp) * 100
                        current_recall = recall(tp, fn) * 100
                        current_fscore = f1(tp, fp, fn) * 100
                        eval_precisions.append(current_precision)
                        eval_recalls.append(current_recall)
                        eval_fscores.append(current_fscore)
                        print("  P: %.2f%%, R: %.2f%%, F1: %.2f%%" %
                              (current_precision, current_recall,
                               current_fscore))

                    # Checkpointing
                    if global_step_v % 10000 == 0:
                        real_save_path = saver.save(sess=sess,
                                                    save_path=save_path,
                                                    global_step=global_step_v)
                        print("Saved the checkpoint to: %s" % real_save_path)
                        print('precisions:', eval_precisions)
                        print('recalls:', eval_recalls)
                        print('fscores:', eval_fscores)

        real_save_path = saver.save(sess=sess,
                                    save_path=save_path,
                                    global_step=global_step_v)
        print("Saved the checkpoint to: %s" % real_save_path)
        print('total precisions:', eval_precisions)
        print('total recalls:', eval_recalls)
        print('total fscores:', eval_fscores)
        print('--------------')
        n = 10
        print('n =', n)
        print('avg. of last n precisions:',
              np.round(np.median(eval_precisions[-n:]), 1), '+-',
              np.round(np.std(eval_precisions[-n:]), 1), '%')
        print('avg. of last n recalls   :',
              np.round(np.median(eval_recalls[-n:]), 1), '+-',
              np.round(np.std(eval_recalls[-n:]), 1), '%')
        print('avg. of last n fscores   :',
              np.round(np.median(eval_fscores[-n:]), 1), '+-',
              np.round(np.std(eval_fscores[-n:]), 1), '%')
        activities_loss = - tf.reduce_mean(activities_in_one_hot * tf.log(activities_preds + EPS))

        activities_avg_preds = tf.reduce_mean(tf.reshape(activities_preds, [B,T,c.num_activities]), [1])
        activities_avg_labels = tf.to_int32(tf.argmax(activities_avg_preds, 1))

        activities_avg_accuracy = tf.reduce_mean(tf.to_float(tf.equal(activities_avg_labels,
                                                                      activities_in[:,5])))

    sequence_vars = tf.get_collection(tf.GraphKeys.VARIABLES, 'ActNet/sequence')

  with tf.variable_scope('train'):
    global_step = slim.create_global_step()
    learning_rate = c.train_learning_rate
    total_loss = activities_loss + c.actions_loss_weight * actions_loss
    train_op = slim.optimize_loss(total_loss,
                                  global_step,
                                  learning_rate,
                                  tf.train.AdamOptimizer)
  train_vars = tf.get_collection(tf.GraphKeys.VARIABLES, 'train')


# TODO: generate the tag
print('finished building the model %s' % c.out_model_path)
pickle.dump(c, open(c.out_config_path, 'wb'))


# In[ ]:

# In[ ]:

## training