Beispiel #1
0
 def load(self, name=None, only_lstm=False):
     self.init_layers()
     if only_lstm:
         saver = tfe.Saver(self.lstm.variables)
     else:
         saver = tfe.Saver(
             [t for var in self.to_update for t in var.variables])
     name = name if name else self.name
     save_path = 'models/' + name + '/'
     saver.restore(save_path)
Beispiel #2
0
def test(embedding_matrix, entity_num, entity_embedding_dim, rnn_hidden_size,
         vocab_size, start_token, max_sent_num, p1, p1_mask, entity_keys,
         keys_mask, encoder_path, decoder_path, eos_ind):
    encoder = Model.BasicRecurrentEntityEncoder(
        embedding_matrix=embedding_matrix,
        max_entity_num=entity_num,
        entity_embedding_dim=entity_embedding_dim)

    temp_entity_cell, temp_entities = encoder([p1, p1_mask], entity_keys)

    print("temp_entities shape", temp_entities.shape)

    decoder = Model.RNNRecurrentEntitiyDecoder(
        embedding_matrix=embedding_matrix,
        rnn_hidden_size=rnn_hidden_size,
        entity_cell=temp_entity_cell,
        vocab_size=vocab_size,
        max_sent_num=max_sent_num,
        entity_embedding_dim=entity_embedding_dim)

    ' training the model for one step just to initialize all variables '
    decoder_inputs_train = [True, temp_entities, vocab_size, start_token]
    labels = [p2, p2_mask]
    decoder(inputs=decoder_inputs_train,
            keys=entity_keys,
            keys_mask=keys_mask,
            training=True,
            labels=labels)
    max_sent_num = tf.shape(p1)[1]
    max_sent_len = tf.shape(p1)[2]

    ' restoring saved models '
    checkpoint_dir_encoder = encoder_path
    os.makedirs(checkpoint_dir_encoder, exist_ok=True)
    checkpoint_prefix_encoder = os.path.join(checkpoint_dir_encoder, 'ckpt')
    tfe.Saver(encoder.variables).restore(checkpoint_prefix_encoder)

    checkpoint_dir_decoder = decoder_path
    os.makedirs(checkpoint_dir_decoder, exist_ok=True)
    checkpoint_prefix_decoder = os.path.join(checkpoint_dir_decoder, 'ckpt')
    tfe.Saver(decoder.variables).restore(checkpoint_prefix_decoder)

    entity_cell, entity_hiddens = encoder([p1, p1_mask], entity_keys)
    # print("entity_hiddens shape:", entity_hiddens)
    decoder_inputs_test = [
        entity_hiddens, max_sent_num, max_sent_len, eos_ind, start_token
    ]
    generated_prgrph, second_prgrph_entities = decoder(
        inputs=decoder_inputs_test,
        keys=entity_keys,
        keys_mask=keys_mask,
        training=False,
        return_last=False)
    print(generated_prgrph)
    print(second_prgrph_entities.shape)
def main(args):
    if args.src_npy is None:
        print('Supply src_npy')
        return 0
    if args.dst_npy is None:
        print('Supply dst_npy')
        return 0

    model = Autoencoder()
    dummyx = tf.zeros((5, 64, 64, 3), dtype=tf.float32)
    _ = model(dummyx, verbose=True)
    saver = tfe.Saver(model.variables)
    saver.restore(args.snapshot)
    model.summary()

    nuclei = np.load(args.src_npy)
    print(nuclei.shape, nuclei.dtype, nuclei.min(), nuclei.max())

    if args.shuffle:
        print('Shuffling')
        np.random.shuffle(nuclei)

    n_images = nuclei.shape[0]
    n_batches = n_images // args.batch

    nuclei = np.array_split(nuclei, n_batches)
    print('Split into {} batches'.format(len(nuclei)))

    if args.n_batches is not None:
        subset_batches = min(n_batches, args.n_batches)
        print('Subsetting {} batches'.format(args.n_batches))
        nuclei = nuclei[:subset_batches]

    if args.draw:
        fig, axs = plt.subplots(5, 5, figsize=(5, 5))

    all_feat = []
    for k, batch in enumerate(nuclei):
        batch = (batch / 255.).astype(np.float32)
        batch_hat, features = model(tf.constant(batch, dtype=tf.float32),
                                    return_z=True,
                                    training=False)
        all_feat.append(features)

        if k % 50 == 0:
            print('batch {:06d}'.format(k))

        if args.draw:
            if k % 10 == 0:
                savebase = os.path.join(args.save, '{:05d}'.format(k))
                draw_result(batch,
                            batch_hat.numpy(),
                            fig,
                            axs,
                            savebase=savebase)

    all_feat = np.concatenate(all_feat, axis=0)
    print('all_feat', all_feat.shape)

    np.save(args.dst_npy, all_feat)
Beispiel #4
0
 def save(self, name=None):
     name = name if name else self.name
     save_path = 'models/' + name + '/'
     if not os.path.isdir(save_path):
         os.makedirs(save_path)
     saver = tfe.Saver([t for var in self.to_update for t in var.variables])
     saver.save(save_path)
Beispiel #5
0
def checkpoint_load(_checkpoint_path, neural_kb, optimizer):
    logger.info('Loading model...')

    logger.info('   neural kb and optimizer')
    checkpoint_model_prefix = os.path.join(_checkpoint_path, "model/")
    model_saver_path = tf.train.latest_checkpoint(checkpoint_model_prefix)

    # old format compatibility
    if os.path.exists(os.path.join(_checkpoint_path, "optim/")):
        import tensorflow.contrib.eager as tfe
        checkpoint_optim_prefix = os.path.join(_checkpoint_path, "optim/")
        optim_checkpoint_path = tf.train.latest_checkpoint(
            checkpoint_optim_prefix)
        if optim_checkpoint_path is not None:
            optim_checkpoint = tfe.Checkpoint(
                optimizer=optimizer,
                optimizer_step=tf.train.get_or_create_global_step())
            optim_checkpoint.restore(optim_checkpoint_path)
            logger.info('   optimiser')
        else:
            logger.info(
                "   ....couldn't find optim/, ignoring it (loading old model)."
            )

        model_saver = tfe.Saver(neural_kb.variables)
        model_saver.restore(model_saver_path)

    else:
        model_saver = tf.train.Saver(neural_kb.variables +
                                     optimizer.variables() +
                                     [tf.train.get_or_create_global_step()])
        model_saver.restore(None, model_saver_path)

    logger.info('... loading done.')
Beispiel #6
0
 def save(self, name=None):
     name = name if name else self.name
     save_path = 'models/' + name + '/'
     if not os.path.isdir(save_path):
         os.makedirs(save_path)
     saver = tfe.Saver(self.encoder_cell.variables)
     saver.save(save_path)
Beispiel #7
0
    def __init__(self, cfg, net, trainingset, valset, resume):
        self.cfg = cfg
        self.net = net
        self.trainingset = trainingset
        self.valset = valset

        #self.optimizer = tf.train.AdamOptimizer(learning_rate=self.cfg.LEARNING_RATE)
        self.optimizer = tf.train.MomentumOptimizer(
            learning_rate=self.cfg.LEARNING_RATE, momentum=self.cfg.MOMENTUM)

        self.global_step = tf.train.get_or_create_global_step()

        self.epoch = tfe.Variable(0,
                                  name='epoch',
                                  dtype=tf.float32,
                                  trainable=False)

        self.writer = tf.contrib.summary.create_summary_file_writer(
            self.cfg.SUMMARY_PATH)

        self.all_variables = (self.net.variables + self.optimizer.variables() +
                              [self.global_step] + [self.epoch])

        if resume:
            tfe.Saver(self.all_variables).restore(
                tf.train.latest_checkpoint(self.cfg.CKPT_PATH))
Beispiel #8
0
 def save(self):
     """ save current weight of layers
     """
     tfe.Saver(self.variables).save(self.checkpoint_directory,
                                    global_step=self.global_step)
     print("saved step %d in %s" %
           (self.global_step, self.checkpoint_directory))
Beispiel #9
0
 def restore(self):
     if self.saver is None:
         if len(self.get_variables()) == 0:
             in_ = tf.zeros((1, 256), tf.float32)
             self.forward(in_, self.zero_state(1))
         self.saver = tfe.Saver(self.get_variables())
     self.saver.restore(self.saver_path)
Beispiel #10
0
def train():
    batch_size = 10
    num_epoches = 2
    data_size = 500 * 10
    display_iter = 100
    dir = r"E:\code\python\deeplearning\tensorflow1.x\data\mnist_digits_images"
    save_dir = r"E:\\code\\python\\deeplearning\\tensorflow1.x\\data\\ck\\"
    dataset = get_dataset(directory=dir, size=[28, 28], batch_size=batch_size)
    iterator = dataset.make_one_shot_iterator()
    data = iterator.get_next()
    model = MNISTModel(name='net')
    global_step = tf.train.get_or_create_global_step()
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    grad_fn = tfe.implicit_gradients(compute_loss)

    while global_step * batch_size / data_size < num_epoches:
        step = int(global_step * batch_size / data_size)
        x, y = tf.cast(data[0], dtype=tf.float32), data[1]
        grads_and_vars = grad_fn(model, x, y)
        optimizer.apply_gradients(
            grads_and_vars=grads_and_vars,
            global_step=tf.train.get_or_create_global_step())
        # 获取要保存的变量
        if global_step % display_iter == 0:
            all_variables = (model.variables + optimizer.variables() +
                             [global_step])
            tfe.Saver(all_variables).save(save_dir,
                                          global_step=global_step)  # 检查点文件
        print("Epoch:{}, Iteration:{}, loss:{}".format(
            step, global_step, compute_loss(model, x, y)))
        global_step = tf.train.get_or_create_global_step()
Beispiel #11
0
def load_model(model_folder,
               model_id,
               model_class,
               training_data_length=1024,
               training_key_length=57):

    # load model features (number of layers, nodes)
    with open('./' + model_folder + '/' + model_id + '_features') as f:
        new_model_features = pickle.load(f)

    # Initialize variables by running a single training iteration
    tf.reset_default_graph()
    optimizer = tf.train.AdamOptimizer(new_model_features.learining_rate)
    model = model_class(new_model_features)

    dummy_data = np.ones([10, training_data_length])

    X_tensor = tf.constant(dummy_data)
    y_tensor = tf.constant(np.ones([10, training_key_length]))
    dummy_train_dataset = tf.data.Dataset.from_tensor_slices(
        (X_tensor, y_tensor))
    dummy_test_dataset = (dummy_data, np.ones([10, training_key_length]))

    _, _ = model.fit_batch(dummy_train_dataset,
                           dummy_test_dataset,
                           optimizer,
                           num_epochs=1,
                           verbose=1,
                           print_errors=False)

    # Restore saved variables
    saver = tfe.Saver(model.variables)
    saver.restore('./' + model_folder + '/' + model_id)

    return model, new_model_features.scaler
Beispiel #12
0
    def save(self, itn=None):
        if self.saver is None:
            self.saver = tfe.Saver(self.get_variables())

        saver_path = self.saver_path
        if itn is not None:
            saver_path += "_{}".format(str(itn).zfill(4))
        self.saver.save(saver_path)
Beispiel #13
0
def write_checkpoint(model, global_step, model_dir, training_id):
    """Write a snapshot of the current model to disk."""
    print("Writing model checkpoint on step %s" % global_step.numpy())
    checkpoint_dir = os.path.join(model_dir, CHECKPOINT_DIR, training_id)
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    tfe.Saver(model.variables).save(checkpoint_prefix, global_step=global_step)
Beispiel #14
0
def main(_):
    assert tfe.num_gpus() > 0, 'Make sure the GPU device exists'
    device_name = '/gpu:{}'.format(args.cuda_device)
    print('\n==> ==> ==> Using device {}'.format(device_name))

    # Load the dataset
    train_ds, val_ds = [
        dataset_generator(
            mode,
            conf.input_size,
            num_epochs=1,
            batch_size=conf.batch_size,
            buffer_size=10000)  # TODO edit this when in real training
        for mode in ['train', 'val']
    ]

    # Create the model and optimizer
    model = RetinaNet()
    optimizer = tf.train.RMSPropOptimizer(conf.learning_rate)

    # Define the path to the TensorBoard summary
    train_dir, val_dir = [
        os.path.join(conf.summary_dir, mode) for mode in ['train', 'val']
    ]
    tf.gfile.MakeDirs(conf.summary_dir)

    train_summary_writer = tf.contrib.summary.create_summary_file_writer(
        train_dir, flush_millis=10000, name='train')
    val_summary_writer = tf.contrib.summary.create_summary_file_writer(
        val_dir, flush_millis=10000, name='val')

    checkpoint_prefix = os.path.join(conf.checkpoint_dir, 'ckpt')

    with tfe.restore_variables_on_create(
            tf.train.latest_checkpoint(conf.checkpoint_dir)):
        with tf.device(device_name):
            epoch = tfe.Variable(1., name='epoch')
            best_loss = tfe.Variable(tf.float32.max, name='best_loss')
            print('==> ==> ==> Start training from epoch {:.0f}...\n'.format(
                epoch.numpy()))

            while epoch <= conf.num_epochs + 1:
                gs = tf.train.get_or_create_global_step()
                with train_summary_writer.as_default():
                    train_one_epoch(model, optimizer, train_ds, epoch.numpy())
                with val_summary_writer.as_default():
                    eval_loss = validate(model, val_ds, epoch.numpy())

                # Save the best loss
                if eval_loss < best_loss:
                    best_loss.assign(
                        eval_loss)  # do NOT be copied directly, SHALLOW!
                    all_variables = (model.variables + optimizer.variables() +
                                     [gs] + [epoch] + [best_loss])
                    tfe.Saver(all_variables).save(checkpoint_prefix,
                                                  global_step=gs)

                epoch.assign_add(1)
Beispiel #15
0
 def save_model(self, step):
     """ Function to save trained model.
     """
     makedirs(
         os.path.join(self.checkpoint_directory, GripperNetwork.MODEL_NAME))
     tfe.Saver(self.variables).save(os.path.join(
         self.checkpoint_directory, GripperNetwork.MODEL_NAME + self.suffix,
         GripperNetwork.MODEL_NAME),
                                    global_step=step)
Beispiel #16
0
def train(num_episodes=1000,
          save_every=100,
          checkpoint_dir="checkpoints",
          tensorboard_dir="tensorboard",
          tboard_every=10,
          find_target_prop=0):
    pol = Policy()
    writer = tf.contrib.summary.create_file_writer(tensorboard_dir)
    for j in range(1, num_episodes + 1):
        random_secret = random.randint(0, config.max_guesses - 1)
        e = Episode(pol, random_secret, find_target_prop, True)
        history = e.generate()

        print("Episode:{}, length: {}".format(j, len(history)))

        G = -1

        optimizer = \
            tf.train.GradientDescentOptimizer(
                learning_rate=config.reinforce_alpha*G)

        for i in reversed(range(1, len(history))):
            history_so_far = history[:i]
            next_action, _ = history[i]
            with tfe.GradientTape() as tape:
                action_logits = pol(history_so_far, with_softmax=False)
                loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=tf.one_hot(tf.convert_to_tensor([next_action]),
                                      config.max_guesses),
                    logits=action_logits)

            grads = tape.gradient(loss, pol.variables)
            optimizer.apply_gradients(zip(grads, pol.variables))

            G -= 1
            optimizer._learning_rate = G * config.reinforce_alpha
            optimizer._learning_rate_tensor = None
            # hack. Should be able to pass a callable as learning_rate, see
            # https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer#args
            # can I perhaps submit a PR to fix this bug?

            sys.stdout.write("{}/{}\r".format(len(history) - i, len(history)))

        if j % save_every == 0 or j == num_episodes:
            saver = tfe.Saver(pol.named_variables)
            save_path = os.path.join(
                checkpoint_dir,
                "episode{}".format(str(j).zfill(len(str(num_episodes)))))
            saver.save(save_path)

        if j % tboard_every == 0:
            with writer.as_default():
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar('total_return',
                                              tf.convert_to_tensor([G]),
                                              step=j)
    return pol
Beispiel #17
0
 def restore_model(self):
     with tf.device(self.device):
         # Run the model once to initialize variables
         dummy_input = tf.constant(tf.zeros((1, 5, 24)))
         dummy_pred = self.predict(dummy_input)
         # Restore the variables of the model
         saver = tfe.Saver(self.variables)
         saver.restore(tf.train.latest_checkpoint
                       (self.checkpoint_directory))
Beispiel #18
0
 def restore_model(self):
     #恢复模型,先随机数据跑一趟?
     with tf.device(self.device):
         dummy_input = tf.constant(tf.zeros((1, 1)))
         dummy_length = tf.constant(1, shape=(1, ))
         dummy_pred = self.predict(dummy_input, dummy_length, False)
         saver = tfe.Saver(self.variables)
         saver.restore(tf.train.latest_checkpoint(
             self.checkpoint_directory))
Beispiel #19
0
 def load(self, name=None, only_lstm=False):
     x = [np.zeros((25, 300))] * 32
     sos = np.zeros((32, 300), dtype=np.float64)
     state = self.decoder_cell.zero_state(32, dtype=tf.float64)
     outputs = np.zeros((32, 25, 150), dtype=np.float64)
     self.forward(sos,
                  state,
                  x,
                  list(range(2, 34, 1)),
                  outputs,
                  training=True)
     if only_lstm:
         saver = tfe.Saver(self.decoder_cell.variables)
     else:
         saver = tfe.Saver(self.decoder_cell.variables +
                           self.word_predictor.variables)
     name = name if name else self.name
     save_path = 'models/' + name + '/'
     saver.restore(save_path)
    def load(self, global_step="latest"):
        self.build()

        saver = tfe.Saver(self.variables)
        if global_step == "latest":
            saver.restore(tf.train.latest_checkpoint(self.checkpoint_directory))
            self.global_step = int(tf.train.latest_checkpoint(self.checkpoint_directory).split('/')[-1][1:])
        else:
            saver.restore(self.checkpoint_directory + "-" + str(global_step))
            self.global_step = global_step
Beispiel #21
0
    def test_save_restore(self):
        pol = Policy()
        episode = [(0, 0), (1, 0), (2, 3)]
        expected = pol(episode).numpy()
        with tempfile.TemporaryDirectory() as tdir:
            path = os.path.join(tdir, "checkpt")

            saver = tfe.Saver(pol.named_variables)
            saver.save(path)

            pol2 = Policy()
            def diff():
                actual = pol2(episode).numpy()
                return np.linalg.norm(actual-expected)

            self.assertGreater(diff(), 0.0001)
            saver = tfe.Saver(pol2.named_variables)
            saver.restore(path)
            self.assertGreaterEqual(0.00001, diff())
Beispiel #22
0
 def load(self,
          x,
          sos,
          state,
          enc_output,
          folder_where_saved="decoder_model/"):
     self.forward(x, sos, (state, enc_output))
     saver = tfe.Saver(self.decoder_cell.variables)
     saver.restore(folder_where_saved)
     print('Successfully loaded Decoder Model')
Beispiel #23
0
 def restore_model(self):
     """ Function to restore trained model.
     """
     self((tf.zeros((1,) + self.input_dims), [5]), training=False)
     try:
         saver = tfe.Saver(self.variables)
         saver.restore(
             tf.train.latest_checkpoint(
                 os.path.join(self.checkpoint_directory, MnistClassifier.MODEL_NAME + self.suffix)))
     except ValueError:
         print('RotateNet model cannot be found.')
Beispiel #24
0
    def load(self, global_step="latest"):
        dummy_input = tf.constant(tf.zeros((1,) + self.input_dim))
        dummy_pred = self.call(dummy_input, True)

        saver = tfe.Saver(self.variables)
        if global_step == "latest":
            saver.restore(tf.train.latest_checkpoint(self.checkpoint_directory))
            self.global_step = int(tf.train.latest_checkpoint(self.checkpoint_directory).split('/')[-1][1:])
        else:
            saver.restore(self.checkpoint_directory + "-" + str(global_step))
            self.global_step = global_step
Beispiel #25
0
def train_gp(dataset, args):
    """Train a GP model and return it. This function uses Tensorflow's eager execution.

    Args:
        dataset: a NamedTuple that contains information about the dataset
        args: parameters in form of a dictionary
    Returns:
        trained GP
    """

    # Set checkpoint path
    if args['save_dir']:
        out_dir = Path(args['save_dir']) / Path(args['model_name'])
        tf.gfile.MakeDirs(str(out_dir))
    else:
        out_dir = Path(mkdtemp())  # Create temporary directory
    checkpoint_prefix = out_dir / Path('model.ckpt')
    step_counter = tf.train.get_or_create_global_step()

    # Restore from existing checkpoint
    with tfe.restore_variables_on_create(tf.train.latest_checkpoint(out_dir)):
        gp, hyper_params = util.construct_from_flags(args, dataset,
                                                     dataset.inducing_inputs)
        optimizer, update_learning_rate = util.get_optimizer(args)

    step = 0
    # shuffle and repeat for the required number of epochs
    train_data = dataset.train_fn().shuffle(50_000).repeat(
        args['eval_epochs']).batch(args['batch_size'])
    while step < args['train_steps']:
        start = time.time()
        # take *at most* (train_steps - step) batches so that we don't run longer than `train_steps`
        fit(gp, optimizer, train_data.take(args['train_steps'] - step),
            step_counter, hyper_params, update_learning_rate, args)
        end = time.time()
        step = step_counter.numpy()
        print(
            f"Train time for the last {args['eval_epochs']} epochs (global step {step}):"
            f" {end - start:0.2f}s")
        evaluate(gp,
                 dataset.test_fn().batch(args['batch_size']), dataset.metric)
        all_variables = (gp.get_all_variables() + optimizer.variables() +
                         [step_counter] + hyper_params)
        # TODO: don't ignore the 'chkpnt_steps' flag
        ckpt_path = tfe.Saver(all_variables).save(checkpoint_prefix,
                                                  global_step=step_counter)
        print(f"Saved checkpoint in '{ckpt_path}'")

    if args['plot'] or args['preds_path']:  # Create predictions
        tf.reset_default_graph()
        mean, var = predict(dataset.xtest, tf.train.latest_checkpoint(out_dir),
                            dataset, args)
        util.post_training(mean, var, out_dir, dataset, args)
    return gp
def main(_):
    tfe.enable_eager_execution()

    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch(
        FLAGS.batch_size)
    test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)

    # Create the model and optimizer
    model = mnist.Model(data_format)
    optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

    if FLAGS.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(FLAGS.output_dir, 'train')
        test_dir = os.path.join(FLAGS.output_dir, 'eval')
        tf.gfile.MakeDirs(FLAGS.output_dir)
    else:
        train_dir = None
        test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')
    checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')

    # Train and evaluate for 11 epochs.
    with tf.device(device):
        for epoch in range(1, 11):
            with tfe.restore_variables_on_create(
                    tf.train.latest_checkpoint(FLAGS.checkpoint_dir)):
                global_step = tf.train.get_or_create_global_step()
                start = time.time()
                with summary_writer.as_default():
                    train(model, optimizer, train_ds, FLAGS.log_interval)
                end = time.time()
                print('\nTrain time for epoch #%d (global step %d): %f' %
                      (epoch, global_step.numpy(), end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            all_variables = (model.variables + optimizer.variables() +
                             [global_step])
            tfe.Saver(all_variables).save(checkpoint_prefix,
                                          global_step=global_step)
Beispiel #27
0
    def train(self):
        """
		Training procedure
		"""
        start_time = time.time()
        step_time = 0.0

        with self.writer.as_default():
            with tf.contrib.summary.record_summaries_every_n_global_steps(
                    self.cfg.DISPLAY_STEP):

                for e in range(self.epoch.numpy(), self.cfg.EPOCHS):
                    tf.assign(self.epoch, e)
                    for (batch_i, (images, labels)) in enumerate(
                            tfe.Iterator(self.trainingset.dataset)):
                        self.global_step = tf.train.get_global_step()
                        step = self.global_step.numpy() + 1

                        step_start_time = int(round(time.time() * 1000))

                        self.optimizer.minimize(
                            lambda: self.loss('train', images, labels),
                            global_step=self.global_step)

                        step_end_time = int(round(time.time() * 1000))
                        step_time += step_end_time - step_start_time

                        if (step % self.cfg.DISPLAY_STEP) == 0:
                            l = self.loss('train', images, labels)
                            a = self.accuracy('train', images, labels).numpy()
                            print(
                                'Epoch: {:03d} Step/Batch: {:09d} Step mean time: {:04d}ms \nLoss: {:.7f} Training accuracy: {:.4f}'
                                .format(e, step, int(step_time / step), l, a))

                        if (step % self.cfg.VALIDATION_STEP) == 0:
                            val_images, val_labels = tfe.Iterator(
                                self.valset.dataset).next()
                            l = self.loss('val', val_images, val_labels)
                            a = self.accuracy('val', val_images,
                                              val_labels).numpy()
                            int_time = time.time() - start_time
                            print(
                                'Elapsed time: {} --- Loss: {:.7f} Validation accuracy: {:.4f}'
                                .format(ut.format_time(int_time), l, a))

                        if (step % self.cfg.SAVE_STEP) == 0:
                            tfe.Saver(self.all_variables).save(
                                os.path.join(self.cfg.CKPT_PATH, 'net.ckpt'),
                                global_step=self.global_step)
                            print('Variables saved')
Beispiel #28
0
    def __init__(self, cfg, net, testset):

        self.cfg = cfg
        self.net = net
        self.testset = testset

        # dummy input to create the tf variables
        _ = self.net(
            tf.random_uniform([
                1, self.cfg.IMG_SHAPE[0], self.cfg.IMG_SHAPE[1],
                self.cfg.IMG_SHAPE[2]
            ]))

        tfe.Saver(self.net.variables).restore(
            tf.train.latest_checkpoint(self.cfg.CKPT_PATH))
Beispiel #29
0
    def load(self, global_step="latest"):
        dummy_input = tf.zeros((1, self.input_dim))
        dummy_mu, dummy_sigma = self.encoding(dummy_input)
        dummy_z = self.sampling_z(dummy_mu, dummy_sigma)
        dummy_ret = self.decoding(dummy_z)

        saver = tfe.Saver(self.variables)
        if global_step == "latest":
            saver.restore(tf.train.latest_checkpoint(self.checkpoint_directory))
            self.global_step = int(tf.train.latest_checkpoint(self.checkpoint_directory).split('/')[-1][1:])
        else:
            saver.restore(self.checkpoint_directory + "-" + str(global_step))
            self.global_step = int(global_step)

        print("load %s" % self.global_step)
Beispiel #30
0
    def load_specific_checkpoint(self, step_number):
        # Run the model once to initialize variables
        initialshape = list(self.state_shape)
        initialshape[0] = 1
        initialshape = tuple(initialshape)
        dummy_input = tf.constant(tf.zeros(initialshape))
        dummy_pred = self.predict(dummy_input, training=False)
        # Restore the variables of the model
        saver = tfe.Saver(self.variables)
        name = self.checkpoint_directory + "-" + str(step_number)
        from colorama import Fore, Style
        print(Fore.CYAN + "loading " + name)

        print(Style.RESET_ALL)
        saver.restore(name)