def test_build_subnetwork(self, builder_params, want_name):
    with tf.Graph().as_default() as g, self.test_session(graph=g) as sess:
      data = np.concatenate([
          np.ones((1, _IMAGE_DIM, _IMAGE_DIM, 1)), 2. * np.ones(
              (1, _IMAGE_DIM, _IMAGE_DIM, 1))
      ])
      features = {"x": tf.constant(data)}
      labels = tf.constant([0, 1])
      training = True
      mode = tf.estimator.ModeKeys.TRAIN
      head = tf.contrib.estimator.binary_classification_head(
          loss_reduction=tf.losses.Reduction.SUM)
      ensemble = None
      name = None
      subnetwork = None
      builders = []
      for builder_param in builder_params:
        builders.append(
            _builder(checkpoint_dir=self.test_subdirectory, **builder_param))
      for idx, builder in enumerate(builders):
        name = builder.name
        # Pass the subnetworks of previous builders to the next builder.
        with tf.variable_scope("subnetwork_{}".format(idx)):
          subnetwork = builder.build_subnetwork(
              features=features,
              logits_dimension=head.logits_dimension,
              training=training,
              iteration_step=tf.train.get_or_create_global_step(),
              summary=_FakeSummary(),
              previous_ensemble=ensemble)
          logits = subnetwork.logits
          weighted_subnetworks = []
          if ensemble:
            logits += ensemble.logits
            weighted_subnetworks = ensemble.weighted_subnetworks
          ensemble = adanet.Ensemble(
              weighted_subnetworks=weighted_subnetworks + [
                  adanet.WeightedSubnetwork(
                      name=None,
                      logits=logits,
                      weight=None,
                      subnetwork=subnetwork)
              ],
              logits=logits,
              bias=0.)

      estimator_spec = head.create_estimator_spec(
          features=features,
          labels=labels,
          mode=mode,
          train_op_fn=lambda loss: tf.no_op(),
          logits=ensemble.logits)
      sess.run(tf.global_variables_initializer())
      train_op = builders[-1].build_subnetwork_train_op(
          subnetwork,
          estimator_spec.loss,
          var_list=None,
          labels=labels,
          iteration_step=tf.train.get_or_create_global_step(),
          summary=_FakeSummary(),
          previous_ensemble=ensemble)
      for _ in range(10):
        sess.run(train_op)
      self.assertEqual(want_name, name)
      self.assertGreater(sess.run(estimator_spec.loss), 0.0)
Ejemplo n.º 2
0
def training_graph(opts, training_data):
    train_graph = tf.Graph()

    with train_graph.as_default():

        dataset, train_iterator, placeholders = training_data.get_dataset(
            opts, is_training=True)
        infeed = ipu_infeed_queue.IPUInfeedQueue(
            dataset, "training_dataset_infeed", 0)

        with ipu_scope('/device:IPU:0'):

            def comp_fn():
                def body(total_loss_, sum_rmse_metric, *args, **kwargs):
                    data_tensors = args
                    observed_ratings = data_tensors[0]
                    loss, rmse_metric, apply_grads_ = graph_builder(opts,
                                                                    observed_ratings=observed_ratings,
                                                                    learning_rate=placeholders["learning_rate"],
                                                                    type='TRAIN')
                    with tf.control_dependencies([apply_grads_]):
                        return total_loss_ + loss, sum_rmse_metric + rmse_metric

                return loops.repeat(opts.batches_per_step,
                                    body,
                                    [tf.constant(0, tf.float32),
                                     tf.constant(0, tf.float32)],
                                    infeed)

            total_loss, sum_rmse_metric = ipu_compiler.compile(comp_fn, [])

        rmse = sum_rmse_metric / opts.batches_per_step
        loss = total_loss / opts.batches_per_step

        tf.summary.scalar("loss", loss)
        tf.summary.scalar("learning_rate", placeholders["learning_rate"])
        tf.summary.scalar("RMSE/train", rmse)

        if opts.compiler_report:
            ipu_ops.ipu_compile_summary('compile_summary', loss)

        train_summary = tf.summary.merge_all()
        train_saver = tf.train.Saver()

        ipu_utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()

    train_writer = tf.summary.FileWriter(
        opts.logs_path + '/train',
        graph=train_graph,
        flush_secs=30)

    ipu_options = util.get_config(opts, profiling=opts.compiler_report)
    ipu_utils.configure_ipu_system(ipu_options)
    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph,
                    train_sess,
                    train_init,
                    [loss, train_summary, rmse],
                    placeholders,
                    infeed,
                    train_saver,
                    train_writer)
Ejemplo n.º 3
0
    def benchmark_model(self,
                        warmup_runs,
                        bm_runs,
                        num_threads,
                        trace_filename=None):
        """Benchmark model."""
        if self.tensorrt:
            print('Using tensorrt ', self.tensorrt)
            graphdef = self.freeze_model()

        if num_threads > 0:
            print('num_threads for benchmarking: {}'.format(num_threads))
            sess_config = tf.ConfigProto(
                intra_op_parallelism_threads=num_threads,
                inter_op_parallelism_threads=1)
        else:
            sess_config = tf.ConfigProto()

        # rewriter_config_pb2.RewriterConfig.OFF
        sess_config.graph_options.rewrite_options.dependency_optimization = 2
        if self.use_xla:
            sess_config.graph_options.optimizer_options.global_jit_level = (
                tf.OptimizerOptions.ON_2)

        with tf.Graph().as_default(), tf.Session(config=sess_config) as sess:
            inputs = tf.placeholder(tf.float32,
                                    name='input',
                                    shape=self.inputs_shape)
            output = self.build_model(inputs)

            img = np.random.uniform(size=self.inputs_shape)

            sess.run(tf.global_variables_initializer())
            if self.tensorrt:
                fetches = [inputs.name] + [i.name for i in output]
                goutput = self.convert_tr(graphdef, fetches)
                inputs, output = goutput[0], goutput[1:]

            if not self.use_xla:
                # Don't use tf.group because XLA removes the whole graph for tf.group.
                output = tf.group(*output)
            else:
                output = tf.add_n([tf.reduce_sum(x) for x in output])

            output_name = [output.name]
            input_name = inputs.name
            graphdef = tf.graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_name)

        with tf.Graph().as_default(), tf.Session(config=sess_config) as sess:
            tf.import_graph_def(graphdef, name='')

            for i in range(warmup_runs):
                start_time = time.time()
                sess.run(output_name, feed_dict={input_name: img})
                logging.info('Warm up: {} {:.4f}s'.format(
                    i,
                    time.time() - start_time))

            print('Start benchmark runs total={}'.format(bm_runs))
            start = time.perf_counter()
            for i in range(bm_runs):
                sess.run(output_name, feed_dict={input_name: img})
            end = time.perf_counter()
            inference_time = (end - start) / bm_runs
            print('Per batch inference time: ', inference_time)
            print('FPS: ', self.batch_size / inference_time)

            if trace_filename:
                run_options = tf.RunOptions()
                run_options.trace_level = tf.RunOptions.FULL_TRACE
                run_metadata = tf.RunMetadata()
                sess.run(output_name,
                         feed_dict={input_name: img},
                         options=run_options,
                         run_metadata=run_metadata)
                logging.info('Dumping trace to %s', trace_filename)
                trace_dir = os.path.dirname(trace_filename)
                if not tf.io.gfile.exists(trace_dir):
                    tf.io.gfile.makedirs(trace_dir)
                with tf.io.gfile.GFile(trace_filename, 'w') as trace_file:
                    trace = timeline.Timeline(
                        step_stats=run_metadata.step_stats)
                    trace_file.write(
                        trace.generate_chrome_trace_format(show_memory=True))
Ejemplo n.º 4
0
def main():
    with tf.Graph().as_default():

        with tf.Session() as sess:

            np.random.seed(seed=SEED)

            if USE_SPLIT_DATASET:
                dataset_tmp = facenet.get_dataset(DATA_DIR)
                train_set, test_set = split_dataset(dataset_tmp,
                                                    MIN_NROF_IMAGES_PER_CLASS,
                                                    NROF_BATCHES_PER_EPOCH)
                if MODE == 'TRAIN':
                    dataset = train_set
                elif MODE == 'CLASSIFY':
                    dataset = test_set
            else:
                dataset = facenet.get_dataset(DATA_DIR)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert (
                    len(cls.image_paths) > 0,
                    'There must be at least one image for each class in the dataset'
                )

            paths, labels = facenet.get_image_paths_and_labels(dataset)

            print('Number of classes: %d' % len(dataset))
            print('Number of images: %d' % len(paths))

            # Load the model
            print('Loading feature extraction model')
            facenet.load_model(MODEL)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(
                math.ceil(1.0 * nrof_images / BATCH_SIZE))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i * BATCH_SIZE
                end_index = min((i + 1) * BATCH_SIZE, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False,
                                           IMAGE_SIZE)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)

            classifier_filename_exp = os.path.expanduser(CLASSIFIER_FILENAME)

            if MODE == 'TRAIN':
                # Train classifier
                print('Training classifier')
                model = SVC(kernel='linear', probability=True)
                model.fit(emb_array, labels)

                # Create a list of class names
                class_names = [cls.name.replace('_', ' ') for cls in dataset]

                # Saving classifier model
                with open(classifier_filename_exp, 'wb') as outfile:
                    pickle.dump((model, class_names), outfile)
                print('Saved classifier model to file "%s"' %
                      classifier_filename_exp)

            elif MODE == 'CLASSIFY':
                # Classify images
                print('Testing classifier')
                with open(classifier_filename_exp, 'rb') as infile:
                    (model, class_names) = pickle.load(infile)

                print('Loaded classifier model from file "%s"' %
                      classifier_filename_exp)

                predictions = model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]

                for i in range(len(best_class_indices)):
                    print('%4d  %s: %.3f' %
                          (i, class_names[best_class_indices[i]],
                           best_class_probabilities[i]))

                accuracy = np.mean(np.equal(best_class_indices, labels))
                print('Accuracy: %.3f' % accuracy)
Ejemplo n.º 5
0
 def train(self,
           sampling: str = 'user uniform',
           epochs: int = 5,
           batch_size: int = 256,
           epoch_sample_limit: int = None,
           model_path: str = None):
     assert isinstance(sampling, str)
     assert isinstance(epochs, int)
     assert isinstance(batch_size, int)
     with tf.Graph().as_default():
         u, i, j = self.build_graph()
         self.__saver = tf.train.Saver()
         if epoch_sample_limit is not None:
             assert isinstance(epoch_sample_limit, int)
             self.epoch_sample_limit = epoch_sample_limit
         batch_limit = self.epoch_sample_limit // batch_size + 1
         self.__sess = tf.Session(config=self.tf_config)
         sampler = None
         if sampling == 'user uniform':
             sampler = self._uniform_user_sampling
         with self.__sess.as_default():
             self.__sess.run(tf.global_variables_initializer())
             if model_path is not None:
                 assert isinstance(model_path, str)
                 tprint(
                     "Initialize weights with the previous trained model")
                 self.import_embeddings(model_path)
             tprint(
                 'Training parameters: lu=%.6f, li=%.6f, lj=%.6f, lb=%.6f' %
                 (self.lu, self.li, self.lj, self.lb))
             tprint('Learning rate is %.6f, regularization mode is %s' %
                    (self.lr, self.mode))
             tprint(
                 'Training for %d epochs of %d batches using %s sampler' %
                 (epochs, batch_limit, sampling))
             if self.fue is not None:
                 tprint('Initialize user embeddings')
                 self.__sess.run(tf.assign(self.__ue, self.fue))
             if self.fie is not None:
                 tprint('Initialize item embeddings')
                 self.__sess.run(tf.assign(self.__ie, self.fie))
             if self.fib is not None:
                 tprint('Initialize item biases')
                 self.__sess.run(tf.assign(self.__ib, self.fib.ravel()))
             for eid in range(epochs):
                 total_time = 0
                 bno = 1
                 for ub, ib, jb in sampler(batch_size):
                     t1 = time.time()
                     _, loss = self.__sess.run([self.solver, self.obj],
                                               feed_dict={
                                                   u: ub,
                                                   i: ib,
                                                   j: jb
                                               })
                     t2 = time.time() - t1
                     sys.stderr.write(
                         '\rEpoch=%3d, batch=%6d, loss=%8.4f, time=%4.4fs' %
                         (eid + 1, bno, loss, t2))
                     total_time += t2
                     bno += 1
                     if bno == batch_limit:
                         break
                 sys.stderr.write(' ... total time collapse %8.4fs' %
                                  (total_time))
                 sys.stderr.flush()
                 print()
         self.fue = self.__sess.run(self.__ue)
         self.fie = self.__sess.run(self.__ie)
         self.fib = self.__sess.run(tf.reshape(self.__ib, (-1, 1)))
  def testMultipleConvMaskAdded(self, pruning_method):

    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
      number_of_layers = 5

      kernel_size = [3, 3]
      base_depth = 4
      depth_step = 7

      input_tensor = tf.ones((8, self.height, self.width, base_depth))

      top_layer = input_tensor

      for ix in range(number_of_layers):
        units = base_depth + (ix + 1) * depth_step
        top_layer = pruning_layers.sparse_conv2d(
            x=top_layer,
            units=units,
            kernel_size=kernel_size,
            is_training=False,
            sparsity_technique=pruning_method)

      if pruning_method == 'variational_dropout':
        theta_logsigma2 = tf.get_collection(
            vd.layers.THETA_LOGSIGMA2_COLLECTION)
        self.assertLen(theta_logsigma2, number_of_layers)

        utils.add_vd_pruning_summaries(theta_logsigma2, threshold=3.0)

        dkl_loss_1 = utils.variational_dropout_dkl_loss(
            reg_scalar=1,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        dkl_loss_1 = tf.reshape(dkl_loss_1, [1])

        dkl_loss_2 = utils.variational_dropout_dkl_loss(
            reg_scalar=5,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        dkl_loss_2 = tf.reshape(dkl_loss_2, [1])

        for ix in range(number_of_layers):
          self.assertListEqual(theta_logsigma2[ix][0].get_shape().as_list(), [
              kernel_size[0], kernel_size[1], base_depth + ix * depth_step,
              base_depth + (ix + 1) * depth_step
          ])

        init_op = tf.global_variables_initializer()

        with self.test_session() as sess:
          sess.run(init_op)
          if pruning_method == 'variational_dropout':
            loss_1, loss_2 = sess.run([dkl_loss_1, dkl_loss_2])

            self.assertGreater(loss_2, loss_1)
      elif pruning_method == 'l0_regularization':
        theta_logalpha = tf.get_collection(
            l0.layers.THETA_LOGALPHA_COLLECTION)
        self.assertLen(theta_logalpha, number_of_layers)

        utils.add_l0_summaries(theta_logalpha)

        l0_norm_loss_1 = utils.l0_regularization_loss(
            reg_scalar=1,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        l0_norm_loss_1 = tf.reshape(l0_norm_loss_1, [1])

        l0_norm_loss_2 = utils.l0_regularization_loss(
            reg_scalar=5,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        l0_norm_loss_2 = tf.reshape(l0_norm_loss_2, [1])

        for ix in range(number_of_layers):
          self.assertListEqual(theta_logalpha[ix][0].get_shape().as_list(), [
              kernel_size[0], kernel_size[1], base_depth + ix * depth_step,
              base_depth + (ix + 1) * depth_step
          ])

        init_op = tf.global_variables_initializer()

        with self.test_session() as sess:
          sess.run(init_op)
          loss_1, loss_2 = sess.run([l0_norm_loss_1, l0_norm_loss_2])
          self.assertGreater(loss_2, loss_1)
      else:
        mask = tf.get_collection(core.MASK_COLLECTION)
        for ix in range(number_of_layers):
          self.assertListEqual(mask[ix].get_shape().as_list(), [
              kernel_size[0], kernel_size[1], base_depth + ix * depth_step,
              base_depth + (ix + 1) * depth_step
          ])
Ejemplo n.º 7
0
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pickle
import tensorflow.compat.v1 as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
'''시작!'''
with tf.Graph().as_default():
    '''기록, 저장 결로 설정'''
    logs_path = 'logs_noise_cancel_machine'
    save_path = 'save_noise_cancel_machine'
    '''학습 데이터 준비'''
    with open('./data/noise_canceling_dataset.pickle', 'rb') as f:
        (x_train, y_train), (x_test, y_test) = pickle.load(f)
    '''신경망 구성'''
    # 네트워크 파라미터 설정
    img_size = 256
    regulation = 0.01
    learning_rate = 0.001
    batch_size = 10
    dropout_rate = 0.1

    # Placeholder 선언
    input_data = tf.placeholder(tf.float32, [None, img_size, img_size, 3],
                                name='input_data')
    input_label = tf.placeholder(tf.float32, [None, img_size, img_size, 3],
                                 name='input_label')

    # 가변 파라미터 설정
    W1 = tf.Variable(tf.random_normal([3, 3, 3, 128],
    def __init__(self,
                 A,
                 X,
                 Y,
                 num_hidden_feat,
                 learning_rate=5e-2,
                 gamma=1e-3,
                 idx_gpu="/gpu:2"):

        self.num_hidden_feat = num_hidden_feat
        self.learning_rate = learning_rate
        self.gamma = gamma
        with tf.Graph().as_default() as g:
            self.graph = g

            with tf.device(idx_gpu):
                # definition of constant matrices
                self.A = convert_coo_to_sparse_tensor(A.tocoo())
                self.X = tf.constant(X, dtype=tf.float32)
                self.Y = tf.constant(Y, dtype=tf.float32)

                self.W0 = tf.get_variable(
                    "W0",
                    shape=[X.shape[1], self.num_hidden_feat],
                    initializer=tf.keras.layers.xavier_initializer(),
                )
                self.W1 = tf.get_variable(
                    "W1",
                    shape=[self.num_hidden_feat, Y.shape[1]],
                    initializer=tf.keras.layers.xavier_initializer(),
                )

                # placeholder definition
                self.idx_nodes = tf.placeholder(tf.int32)
                self.keep_prob = tf.placeholder(tf.float32)

                # model definition
                self.l_input = tf.nn.dropout(self.X, self.keep_prob)

                self.X0_tilde = tf.sparse_tensor_dense_matmul(
                    self.A, self.l_input)
                self.X0 = tf.matmul(self.X0_tilde, self.W0)
                self.X0 = tf.nn.relu(self.X0)
                self.X0 = tf.nn.dropout(self.X0, self.keep_prob)

                self.X1_tilde = tf.sparse_tensor_dense_matmul(self.A, self.X0)
                self.logits = tf.matmul(self.X1_tilde, self.W1)

                self.l_out = tf.gather(self.logits, self.idx_nodes)
                self.c_Y = tf.gather(self.Y, self.idx_nodes)

                # loss function definition
                self.l2_reg = tf.nn.l2_loss(self.W0)
                self.data_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(logits=self.l_out,
                                                            labels=self.c_Y))
                self.loss = self.data_loss + self.gamma * self.l2_reg

                # solver definition
                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate)
                self.opt_step = self.optimizer.minimize(self.loss)

                # predictions and accuracy extraction
                self.c_predictions = tf.argmax(tf.nn.softmax(self.l_out), 1)
                self.accuracy = tf.contrib.metrics.accuracy(
                    self.c_predictions, tf.argmax(self.c_Y, 1))

                # gradients computation
                self.trainable_variables = tf.trainable_variables()
                self.var_grad = tf.gradients(self.loss,
                                             tf.trainable_variables())
                self.norm_grad = frobenius_norm(
                    tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))

                # session creation
                config = tf.ConfigProto(allow_soft_placement=True)
                config.gpu_options.allow_growth = True
                self.session = tf.Session(config=config)

                # session initialization
                init = tf.global_variables_initializer()
                self.session.run(init)
Ejemplo n.º 9
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():
            self.sess = tf.Session()
            self.x = tf.placeholder(shape=[None, 24, 24, 3], dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, NUM_CLASSES],
                                    dtype=tf.float32)

            # conv1
            with tf.variable_scope('conv1') as scope:
                kernel = variable_on_cpu('weights', shape=[5, 5, 3, 64])
                conv = tf.nn.conv2d(self.x,
                                    kernel, [1, 1, 1, 1],
                                    padding='SAME')
                biases = variable_on_cpu('biases', [64])
                pre_activation = tf.nn.bias_add(conv, biases)
                conv1 = tf.nn.relu(pre_activation, name=scope.name)

            # pool1
            pool1 = tf.nn.max_pool(conv1,
                                   ksize=[1, 3, 3, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='SAME',
                                   name='pool1')
            # norm1
            norm1 = tf.nn.lrn(pool1,
                              4,
                              bias=1.0,
                              alpha=0.001 / 9.0,
                              beta=0.75,
                              name='norm1')

            # conv2
            with tf.variable_scope('conv2') as scope:
                kernel = variable_on_cpu('weights', shape=[5, 5, 64, 64])
                conv = tf.nn.conv2d(norm1,
                                    kernel, [1, 1, 1, 1],
                                    padding='SAME')
                biases = variable_on_cpu('biases', [64])
                pre_activation = tf.nn.bias_add(conv, biases)
                conv2 = tf.nn.relu(pre_activation, name=scope.name)

            # norm2
            norm2 = tf.nn.lrn(conv2,
                              4,
                              bias=1.0,
                              alpha=0.001 / 9.0,
                              beta=0.75,
                              name='norm2')
            # pool2
            pool2 = tf.nn.max_pool(norm2,
                                   ksize=[1, 3, 3, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='SAME',
                                   name='pool2')

            # local3
            with tf.variable_scope('local3') as scope:
                # Move everything into depth so we can perform a single matrix multiply.
                # images.get_shape().as_list()[0] = batchsize
                # reshape = tf.keras.layers.Flatten()(pool2)
                reshape = tf.reshape(pool2, [tf.shape(self.x)[0], 6 * 6 * 64])
                dim = reshape.get_shape()[1].value
                weights = variable_on_cpu('weights', shape=[dim, 384])
                biases = variable_on_cpu('biases', [384])
                local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases,
                                    name=scope.name)

            # local4
            with tf.variable_scope('local4') as scope:
                weights = variable_on_cpu('weights', shape=[384, 192])
                biases = variable_on_cpu('biases', [192])
                self.local4 = tf.nn.relu(tf.matmul(local3, weights) + biases,
                                         name=scope.name)

            # linear layer(WX + b),
            # We don't apply softmax here because
            # tf.nn.sparse_softmax_cross_entropy_with_logits
            # accepts the unscaled logits
            # and performs the softmax internally for efficiency.
            with tf.variable_scope('softmax_linear') as scope:
                weights = variable_on_cpu('weights', [192, NUM_CLASSES])
                biases = variable_on_cpu('biases', [NUM_CLASSES])
                self.softmax_linear = tf.add(tf.matmul(self.local4, weights),
                                             biases,
                                             name=scope.name)

            print('loading the network ...')
            saver_network = tf.train.Saver()
            # Restores from checkpoint
            saver_network.restore(
                self.sess, os.path.join(PATH_MODEL, 'model.ckpt-100000'))
            print('Graph successfully loaded.')
def convert(checkpoint_from_path,
            checkpoint_to_path,
            num_heads,
            name_replacements,
            permutations,
            exclude_patterns=None):
  """Migrates the names of variables within a checkpoint.

  Args:
    checkpoint_from_path: Path to source checkpoint to be read in.
    checkpoint_to_path: Path to checkpoint to be written out.
    num_heads: The number of heads of the model.
    name_replacements: A list of tuples of the form (match_str, replace_str)
      describing variable names to adjust.
    permutations: A list of tuples of the form (match_str, permutation)
      describing permutations to apply to given variables. Note that match_str
      should match the original variable name, not the replaced one.
    exclude_patterns: A list of string patterns to exclude variables from
      checkpoint conversion.

  Returns:
    A dictionary that maps the new variable names to the Variable objects.
    A dictionary that maps the old variable names to the new variable names.
  """
  with tf.Graph().as_default():
    tf.logging.info("Reading checkpoint_from_path %s", checkpoint_from_path)
    reader = tf.train.NewCheckpointReader(checkpoint_from_path)
    name_shape_map = reader.get_variable_to_shape_map()
    new_variable_map = {}
    conversion_map = {}
    for var_name in name_shape_map:
      if exclude_patterns and _has_exclude_patterns(var_name, exclude_patterns):
        continue
      # Get the original tensor data.
      tensor = reader.get_tensor(var_name)

      # Look up the new variable name, if any.
      new_var_name = _bert_name_replacement(var_name, name_replacements)

      # See if we need to reshape the underlying tensor.
      new_shape = None
      if num_heads > 0:
        new_shape = _get_new_shape(new_var_name, tensor.shape, num_heads)
      if new_shape:
        tf.logging.info("Veriable %s has a shape change from %s to %s",

                        var_name, tensor.shape, new_shape)
        tensor = np.reshape(tensor, new_shape)

      # See if we need to permute the underlying tensor.
      permutation = _get_permutation(var_name, permutations)
      if permutation:
        tensor = np.transpose(tensor, permutation)

      # Create a new variable with the possibly-reshaped or transposed tensor.
      var = tf.Variable(tensor, name=var_name)

      # Save the variable into the new variable map.
      new_variable_map[new_var_name] = var

      # Keep a list of converter variables for sanity checking.
      if new_var_name != var_name:
        conversion_map[var_name] = new_var_name

    saver = tf.train.Saver(new_variable_map)

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      tf.logging.info("Writing checkpoint_to_path %s", checkpoint_to_path)
      saver.save(sess, checkpoint_to_path)

  tf.logging.info("Summary:")
  tf.logging.info("  Converted %d variable name(s).", len(new_variable_map))
  tf.logging.info("  Converted: %s", str(conversion_map))
Ejemplo n.º 11
0
    def train(self,
              env_fn,
              hparams,
              simulated,
              save_continuously,
              epoch,
              sampling_temp=1.0,
              num_env_steps=None,
              env_step_multiplier=1,
              eval_env_fn=None,
              report_fn=None,
              model_save_fn=None):
        assert sampling_temp == 1.0 or hparams.learning_rate == 0.0, \
            "Sampling with non-1 temperature does not make sense during training."

        if not save_continuously:
            # We do not save model, as that resets frames that we need at restarts.
            # But we need to save at the last step, so we set it very high.
            hparams.save_models_every_epochs = 1000000

        if simulated:
            simulated_str = "sim"
        else:
            simulated_str = "real"
        name_scope = "ppo_{}{}".format(simulated_str, epoch + 1)
        event_dir = os.path.join(self.base_event_dir, "ppo_summaries",
                                 str(epoch) + simulated_str)

        with tf.Graph().as_default():
            with tf.name_scope(name_scope):
                with tf.variable_scope(tf.get_variable_scope(),
                                       reuse=tf.AUTO_REUSE):
                    env = env_fn(in_graph=True)
                    (train_summary_op, eval_summary_op,
                     initializers) = (_define_train(
                         env,
                         hparams,
                         eval_env_fn,
                         sampling_temp,
                         distributional_size=self._distributional_size,
                         distributional_subscale=self._distributional_subscale,
                         distributional_threshold=self.
                         _distributional_threshold,
                         epoch=epoch if simulated else -1,
                         frame_stack_size=self.frame_stack_size,
                         force_beginning_resets=simulated))

                if num_env_steps is None:
                    iteration_increment = hparams.epochs_num
                else:
                    iteration_increment = int(
                        math.ceil(num_env_steps /
                                  (env.batch_size * hparams.epoch_length)))
                iteration_increment *= env_step_multiplier

                self._num_completed_iterations += iteration_increment

                restarter = Restarter("policy", self.agent_model_dir,
                                      self._num_completed_iterations)
                if restarter.should_skip:
                    return

                if hparams.lr_decay_in_final_epoch:
                    if epoch != self.total_num_epochs - 1:
                        # Extend the warmup period to the end of this epoch.
                        hparams.learning_rate_warmup_steps = restarter.target_global_step
                    else:
                        if self._lr_decay_start is None:
                            # Stop the warmup at the beginning of this epoch.
                            self._lr_decay_start = \
                                restarter.target_global_step - iteration_increment
                        hparams.learning_rate_warmup_steps = self._lr_decay_start

                _run_train(hparams,
                           event_dir,
                           self.agent_model_dir,
                           restarter,
                           train_summary_op,
                           eval_summary_op,
                           initializers,
                           epoch,
                           report_fn=report_fn,
                           model_save_fn=model_save_fn)
Ejemplo n.º 12
0
    def __init__(self,
                 config,
                 batch_size,
                 checkpoint_dir_or_path=None,
                 var_name_substitutions=None,
                 session_target='',
                 **sample_kwargs):
        if tf.gfile.IsDirectory(checkpoint_dir_or_path):
            checkpoint_path = tf.train.latest_checkpoint(
                checkpoint_dir_or_path)
        else:
            checkpoint_path = checkpoint_dir_or_path
        self._config = copy.deepcopy(config)
        self._config.data_converter.set_mode('infer')
        self._config.hparams.batch_size = batch_size
        with tf.Graph().as_default():
            model = self._config.model
            model.build(self._config.hparams,
                        self._config.data_converter.output_depth,
                        is_training=False)
            # Input placeholders
            self._temperature = tf.placeholder(tf.float32, shape=())

            if self._config.hparams.z_size:
                self._z_input = tf.placeholder(
                    tf.float32,
                    shape=[batch_size, self._config.hparams.z_size])
            else:
                self._z_input = None

            if self._config.data_converter.control_depth > 0:
                self._c_input = tf.placeholder(
                    tf.float32,
                    shape=[None, self._config.data_converter.control_depth])
            else:
                self._c_input = None

            self._inputs = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size, None, self._config.data_converter.input_depth
                ])
            self._controls = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size, None, self._config.data_converter.control_depth
                ])
            self._inputs_length = tf.placeholder(
                tf.int32,
                shape=[batch_size] +
                list(self._config.data_converter.length_shape))
            self._max_length = tf.placeholder(tf.int32, shape=())
            # Outputs
            self._outputs, self._decoder_results = model.sample(
                batch_size,
                max_length=self._max_length,
                z=self._z_input,
                c_input=self._c_input,
                temperature=self._temperature,
                **sample_kwargs)
            if self._config.hparams.z_size:
                q_z = model.encode(self._inputs, self._inputs_length,
                                   self._controls)
                self._mu = q_z.loc
                self._sigma = q_z.scale.diag
                self._z = q_z.sample()

            var_map = None
            if var_name_substitutions is not None:
                var_map = {}
                for v in tf.global_variables():
                    var_name = v.name[:-2]  # Strip ':0' suffix.
                    for pattern, substitution in var_name_substitutions:
                        var_name = re.sub(pattern, substitution, var_name)
                    if var_name != v.name[:-2]:
                        tf.logging.info('Renaming `%s` to `%s`.', v.name[:-2],
                                        var_name)
                    var_map[var_name] = v

            # Restore graph
            self._sess = tf.Session(target=session_target)
            saver = tf.train.Saver(var_map)
            if (os.path.exists(checkpoint_path)
                    and tarfile.is_tarfile(checkpoint_path)):
                tf.logging.info('Unbundling checkpoint.')
                with tempfile.TemporaryDirectory() as temp_dir:
                    tar = tarfile.open(checkpoint_path)
                    tar.extractall(temp_dir)
                    # Assume only a single checkpoint is in the directory.
                    for name in tar.getnames():
                        if name.endswith('.index'):
                            checkpoint_path = os.path.join(
                                temp_dir, name[0:-6])
                            break
                    saver.restore(self._sess, checkpoint_path)
            else:
                saver.restore(self._sess, checkpoint_path)
def main(unused_argv):

  train_steps = FLAGS.train_steps
  num_episode = FLAGS.num_episode
  trainee_model = FLAGS.trainee_model

  best_validation_loss = 10

  batch_size = 1000
  train_size = 50000
  valid_size = 10000
  test_batch_num = 10
  if trainee_model == 'resnet':
    test_size = 1000
  else:
    test_size = 10000

  fashion_mnist = keras.datasets.fashion_mnist
  (train_images, train_labels), (test_images,
                                 test_labels) = fashion_mnist.load_data()
  train_images = train_images / 255.0
  test_images = test_images / 255.0

  # Different input tensor shape for different trainee models.
  if trainee_model == 'mlp':
    num_pixels = train_images.shape[1] * train_images.shape[2]
    x_train = train_images.reshape(train_images.shape[0],
                                   num_pixels).astype('float32')
    train_set = x_train[0:train_size, :]
    train_label = train_labels[0:train_size].astype('int64')
    valid_set = x_train[train_size:, :]
    valid_label = train_labels[train_size:].astype('int64')

    test_set = test_images.reshape(test_images.shape[0],
                                   num_pixels).astype('float32')
    test_label = test_labels.astype('int64')
  elif trainee_model == 'cnn' or trainee_model == 'resnet':
    x_train = train_images.reshape(train_images.shape[0],
                                   train_images.shape[1],
                                   train_images.shape[2], 1).astype('float32')
    train_set = x_train[0:train_size, :, :, :]
    train_label = train_labels[0:train_size].astype('int64')
    valid_set = x_train[train_size:, :, :, :]
    valid_label = train_labels[train_size:].astype('int64')

    test_set = test_images.reshape(test_images.shape[0],
                                   test_images.shape[1],
                                   test_images.shape[2], 1).astype('float32')
    test_label = test_labels.astype('int64')

  init_observation = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
  observation_dim = init_observation.shape[0]

  adaptive_tuner_sess = tf.Session()
  adaptive_tuner = AdaptiveTuner(
      sess=adaptive_tuner_sess,
      n_features=observation_dim,
      n_actions=1,
      actor_lr=FLAGS.actor_learning_rate,
      critic_lr=FLAGS.critic_learning_rate,
      proposed_learning_rate=FLAGS.default_learning_rate)

  # Log the training process.
  running_reward = 0
  valid_loss_matrix = np.zeros((num_episode, train_steps))
  train_loss_matrix = np.zeros((num_episode, train_steps))
  valid_accuracy_matrix = np.zeros((num_episode, train_steps))
  test_loss_matrix = np.zeros((num_episode, train_steps))
  test_accuracy_matrix = np.zeros((num_episode, train_steps))
  reward_matrix = np.zeros((num_episode, train_steps))
  observation_matrix = np.zeros((num_episode, train_steps, observation_dim))
  learning_rate_matrix = np.zeros((num_episode, train_steps))
  running_reward_array = np.zeros(num_episode)

  gnn = tf.Graph()
  with gnn.as_default():

    # Prepare train/valid/test split for Fashion Mnist
    dataset = tf.data.Dataset.from_tensor_slices(
        (train_set, train_label)).repeat().batch(batch_size)
    train_iter = tf.data.make_one_shot_iterator(dataset)
    feature, label = train_iter.get_next()
    valid_dataset = tf.data.Dataset.from_tensor_slices(
        (valid_set, valid_label)).repeat().batch(valid_size)
    valid_iter = tf.data.make_one_shot_iterator(valid_dataset)
    valid_feature, valid_label = valid_iter.get_next()
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (test_set, test_label)).repeat().batch(test_size)
    test_iter = tf.data.make_one_shot_iterator(test_dataset)
    test_feature, test_label = test_iter.get_next()

    learned_learning_rate = tf.placeholder(tf.float32, shape=[])

    # Build trainee model.
    if trainee_model == 'mlp':
      train_logits = build_mlp_model(feature, reuse=False)
      valid_logits = build_mlp_model(valid_feature, reuse=True)
      test_logits = build_mlp_model(test_feature, reuse=True)
    elif trainee_model == 'cnn':
      train_logits = build_cnn_model(feature, reuse=False)
      valid_logits = build_cnn_model(valid_feature, reuse=True)
      test_logits = build_cnn_model(test_feature, reuse=True)
    elif trainee_model == 'resnet':
      resnet_size = 18
      resnet_18 = resnet_model.FastCifar10Model(
          resnet_size=resnet_size, data_format='channels_first')
      train_logits = resnet_18(feature, True)
      train_logits = tf.cast(train_logits, tf.float32)
      valid_logits = resnet_18(valid_feature, False)
      valid_logits = tf.cast(valid_logits, tf.float32)
      test_logits = resnet_18(test_feature, False)
      test_logits = tf.cast(test_logits, tf.float32)
    else:
      raise ValueError('Wrong trainee_model flag value.')

    prediction = tf.nn.softmax(train_logits)
    valid_prediction = tf.nn.softmax(valid_logits)
    test_prediction = tf.nn.softmax(test_logits)

    if trainee_model == 'resnet':
      w1 = tf.get_default_graph().get_tensor_by_name(
          'resnet_model/dense/kernel:0')
      w2 = tf.get_default_graph().get_tensor_by_name(
          'resnet_model/dense/bias:0')
    else:
      w1 = tf.get_default_graph().get_tensor_by_name('w1/kernel:0')
      w2 = tf.get_default_graph().get_tensor_by_name('w2/kernel:0')

    mean_w1, var_w1 = tf.nn.moments(w1, axes=[0, 1])
    if trainee_model == 'resnet':
      mean_w2, var_w2 = tf.nn.moments(w2, axes=[0])
    else:
      mean_w2, var_w2 = tf.nn.moments(w2, axes=[0, 1])

    loss = tf.losses.sparse_softmax_cross_entropy(
        labels=label, logits=train_logits)

    train_op = tf.train.AdamOptimizer(
        learning_rate=learned_learning_rate).minimize(loss)

    if trainee_model == 'resnet':
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      train_op = tf.group([train_op, update_ops])

    valid_loss = tf.losses.sparse_softmax_cross_entropy(
        labels=valid_label, logits=valid_logits)
    valid_accuracy = compute_accuracy(valid_prediction, valid_label)
    test_loss = tf.losses.sparse_softmax_cross_entropy(
        labels=test_label, logits=test_logits)
    test_accuracy = compute_accuracy(test_prediction, test_label)

  for i_episode in range(num_episode):

    obs_t, action_t, reward_t, obs_t_1 = [], [], [], []
    best_target_valid_loss = 10.0
    with tf.Session(config=tf.ConfigProto(), graph=gnn) as sess:
      sess.run(tf.global_variables_initializer())
      observation = init_observation
      prev_prediction = np.random.rand(batch_size, 10)
      prediction_log_var_ema = 0
      prediction_change_log_var_ema = 0
      reward = 0
      num_reward = 0
      average_reward = 0
      reward_sum = 0
      keep_lr_interval = 1
      exp_decay = 0.8
      action = 0.0
      track_reward = []
      adaptive_tuner.proposed_learning_rate = FLAGS.default_learning_rate
      for i in range(train_steps):
        if keep_lr_interval == 1 or i > train_steps - 10:
          action, action_multiplier = adaptive_tuner.choose_action(observation)
          keep_lr_interval = FLAGS.keep_lr_interval
          learning_rate_matrix[i_episode, i] = action

          _, loss_value, prediction_value, valid_loss_value, valid_accuracy_value, mean_w1_value, var_w1_value, mean_w2_value, var_w2_value = (
              sess.run([
                  train_op, loss, prediction, valid_loss, valid_accuracy,
                  mean_w1, var_w1, mean_w2, var_w2
              ],
                       feed_dict={learned_learning_rate: action}))

          if valid_loss_value < best_target_valid_loss:
            best_target_valid_loss = valid_loss_value
            if trainee_model == 'resnet':
              total_test_loss = 0.0
              total_test_accuracy = 0.0
              for _ in range(test_batch_num):
                batch_test_loss_value, batch_test_accuracy_value = sess.run(
                    [test_loss, test_accuracy])
                total_test_loss += batch_test_loss_value
                total_test_accuracy += batch_test_accuracy_value
              test_loss_value = total_test_loss / test_batch_num
              test_accuracy_value = total_test_accuracy / test_batch_num
            else:
              test_loss_value, test_accuracy_value = sess.run(
                  [test_loss, test_accuracy])

            test_loss_matrix[i_episode, i] = test_loss_value
            test_accuracy_matrix[i_episode, i] = test_accuracy_value

          train_loss_matrix[i_episode, i] = loss_value
          valid_loss_matrix[i_episode, i] = valid_loss_value
          valid_accuracy_matrix[i_episode, i] = valid_accuracy_value
          diff_prediction = prediction_value - prev_prediction

          if prediction_log_var_ema == 0:
            prediction_log_var_ema = np.log(np.var(prediction_value))
          else:
            prediction_log_var_ema = exp_decay * prediction_log_var_ema + (
                1 - exp_decay) * np.log(np.var(prediction_value) + LOG_BASE)

          if prediction_change_log_var_ema == 0:
            prediction_change_log_var_ema = np.log(np.var(diff_prediction))
          else:
            prediction_change_log_var_ema = exp_decay * prediction_change_log_var_ema + (
                1 - exp_decay) * np.log(np.var(diff_prediction) + LOG_BASE)

          # Collect all state observations.
          new_observation = np.array([
              valid_loss_value, prediction_log_var_ema,
              prediction_change_log_var_ema, loss_value, mean_w1_value,
              var_w1_value, mean_w2_value, var_w2_value, action
          ])
          observation_matrix[i_episode, i, :] = observation
          # Different reward functions.
          if FLAGS.reward_baseline == 'Fixed':
            reward = FLAGS.fixed_baseline_value - valid_loss_value
          elif FLAGS.reward_baseline == 'Average':
            reward_sum += valid_loss_value
            num_reward += 1
            average_reward = float(reward_sum) / num_reward
            reward = average_reward - valid_loss_value
          elif FLAGS.reward_baseline == 'Exponential':
            if average_reward == 0:
              average_reward = valid_loss_value
            else:
              average_reward = average_reward * exp_decay + (
                  valid_loss_value * (1 - exp_decay))
            reward = average_reward - valid_loss_value
          elif FLAGS.reward_baseline == 'Ratio':
            reward = FLAGS.reward_numerator / valid_loss_value - FLAGS.fixed_baseline_value
          else:
            raise ValueError('Wrong reward_baseline flag value.')

          reward_matrix[i_episode, i] = reward

          track_reward.append(reward)
          obs_t.append(observation)
          action_t.append(np.squeeze(action_multiplier))
          reward_t.append(reward)
          obs_t_1.append(new_observation)
          observation = new_observation
          prev_prediction = prediction_value
        else:
          keep_lr_interval -= 1
          sess.run([train_op], feed_dict={learned_learning_rate: action})

        if i == train_steps - 1:
          current_best_validation_loss = valid_loss_matrix[i_episode, i]
          if current_best_validation_loss < best_validation_loss:
            best_validation_loss = current_best_validation_loss

          obs_t_ts, action_t_ts, reward_t_ts, obs_t_1_ts = np.vstack(
              obs_t), np.vstack(action_t), np.vstack(reward_t), np.vstack(
                  obs_t_1)
          value_t = adaptive_tuner.get_value(obs_t_1_ts)
          td_reward = reward_t_ts + GAMMA * value_t
          adaptive_tuner.update(obs_t_ts, action_t_ts, td_reward)
          obs_t, action_t, reward_t, obs_t_1 = [], [], [], []
          episode_reward = sum(track_reward)
          if running_reward == 0:
            running_reward = episode_reward
          else:
            running_reward = running_reward * 0.99 + episode_reward * 0.01
          running_reward_array[i_episode] = running_reward
Ejemplo n.º 14
0
    def generate_text(self, input_text):
        model_name = '124M_TRAINED'  #String, which model to use
        seed = None  #Integer seed for random number generators, fix seed to reproduce results
        nsamples = 1  #Number of samples to return total
        batch_size = 1  #Number of batches (only affects speed/memory).  Must divide nsamples.
        length = 150  #Number of tokens in generated text, if None (default), is determined by model hyperparameters
        temperature = 1  #Float value controlling randomness in boltzmann
        # distribution. Lower temperature results in less random completions. As the
        # temperature approaches zero, the model will become deterministic and
        # repetitive. Higher temperature results in more random completions.
        top_k = 40  # Integer value controlling diversity. 1 means only 1 word is
        # considered for each step (token), resulting in deterministic completions,
        # while 40 means 40 words are considered at each step. 0 (default) is a
        # special setting meaning no restrictions. 40 generally is a good value.
        top_p = 1
        #:models_dir : path to parent folder containing model subfolders (i.e. contains the <model_name> folder)
        self.response = ""

        if batch_size is None:
            batch_size = 1
        assert nsamples % batch_size == 0

        # this is going to give us our current location where our generator.py is
        cur_path = os.path.dirname(__file__) + "/models/" + model_name
        enc = encoder.get_encoder(model_name,
                                  os.path.dirname(__file__) + "/models")
        hparams = model.default_hparams()
        with open(os.path.join(cur_path + '/hparams.json')) as f:
            hparams.override_from_dict(json.load(f))

        if length is None:
            length = hparams.n_ctx // 2
        elif length > hparams.n_ctx:
            raise ValueError("Can't get samples longer than window size: %s" %
                             hparams.n_ctx)

        with tf.Session(graph=tf.Graph()) as sess:
            context = tf.placeholder(tf.int32, [batch_size, None])
            np.random.seed(seed)
            tf.set_random_seed(seed)
            output = sample.sample_sequence(hparams=hparams,
                                            length=length,
                                            context=context,
                                            batch_size=batch_size,
                                            temperature=temperature,
                                            top_k=top_k,
                                            top_p=top_p)

            saver = tf.train.Saver()
            ckpt = tf.train.latest_checkpoint(cur_path)
            saver.restore(sess, ckpt)

            context_tokens = enc.encode(input_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    # our response/generated text
                    text = enc.decode(out[i])
                    self.response = text
        # return generated text
        return self.response
  def testCompressionOpInterface(self):
    with tf.Graph().as_default():
      with self.cached_session() as sess:
        compression_hparams = ("name=cifar10_compression,"
                               "begin_compression_step=1000,"
                               "end_compression_step=120000,"
                               "compression_frequency=10,"
                               "compression_option=1,"
                               "update_option=0")
        global_step = tf.compat.v1.get_variable("global_step", initializer=30)
        c = compression_op.CompressionOp(
            spec=compression_op.CompressionOp.get_default_hparams().parse(
                compression_hparams),
            global_step=global_step)
        # Need to add initial value for a_matrix so that we would know what
        # to expect back.
        a_matrix_init = np.array([[1.0, 1.0, 1.0], [1.0, 0, 0], [1.0, 0, 0]])
        a_matrix = tf.compat.v1.get_variable(
            "a_matrix",
            initializer=a_matrix_init.astype(np.float32),
            dtype=tf.float32)
        matrix_compressor = compression_op.LowRankDecompMatrixCompressor(
            spec=compression_op.LowRankDecompMatrixCompressor
            .get_default_hparams().parse("num_rows=3,num_cols=3,rank=200"))

        [a_matrix_compressed, a_matrix_update_op] = c.get_apply_compression_op(
            a_matrix, matrix_compressor, scope="my_scope")

        tf.compat.v1.global_variables_initializer().run()
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.a_matrix_tfvar.eval())) < 0.00001),
            False)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.b_matrix_tfvar.eval())) < 0.00001),
            True)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.c_matrix_tfvar.eval())) < 0.00001),
            True)

        tf.compat.v1.assign(global_step, 1001).eval()
        sess.run(a_matrix_update_op)
        a_matrix_compressed.eval()
        self.assertEqual(c._global_step.eval(), 1001)
        self.assertAlmostEqual(c.alpha.eval(), 0.99)
        self.assertEqual(c._last_alpha_update_step.eval(), 1001)
        self.assertAllEqual(
            np.array([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, [True, True, True])

        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.b_matrix_tfvar.eval())) < 0.00001),
            False)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.c_matrix_tfvar.eval())) < 0.00001),
            False)

        [b_matrix,
         c_matrix] = matrix_compressor.static_matrix_compressor(a_matrix_init)
        # since the matrices may match up to signs, we take absolute values.
        self.assertAllEqual(
            np.linalg.norm(np.abs(b_matrix) - np.abs(c.b_matrix_tfvar.eval())) <
            0.00001, True)
        self.assertAllEqual(
            np.linalg.norm(np.abs(c_matrix) - np.abs(c.c_matrix_tfvar.eval())) <
            0.00001, True)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.b_matrix_tfvar.eval())) < 0.00001),
            False)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.c_matrix_tfvar.eval())) < 0.00001),
            False)

        print("before 1002 step, c.alpha is ", c.alpha.eval())
        tf.compat.v1.assign(global_step, 1001).eval()
        sess.run(a_matrix_update_op)
        a_matrix_compressed.eval()
        print("after 1002 step, c.alpha is ", c.alpha.eval())
        self.assertEqual(c._global_step.eval(), 1001)
        self.assertAlmostEqual(c.alpha.eval(), 0.99)
        self.assertEqual(c._last_alpha_update_step.eval(), 1001)
        self.assertAllEqual(
            np.all([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, True)

        print("before 2000 step, alpha is ", c.alpha.eval())
        tf.compat.v1.assign(global_step, 2000).eval()
        a_matrix_update_op.eval()
        a_matrix_compressed.eval()
        print("after 2000 step, alpha is ", c.alpha.eval())
        self.assertEqual(c._global_step.eval(), 2000)
        self.assertAlmostEqual(c.alpha.eval(), 0.98)
        self.assertEqual(c._last_alpha_update_step.eval(), 2000)
        self.assertAllEqual(
            np.array([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, [True, True, True])
Ejemplo n.º 16
0
            if not checkpoint:
                raise ValueError(
                    "No checkpoint file found in: {}".format(checkpoint))

        saver.restore(sess, checkpoint)

        #sess.run(tf.initialize_all_variables())
        '''
      for op in tf.get_default_graph().get_operations():
      print(op.name, op.op_def.name)
      for var in tf.global_variables():
      print (var.name)
      #if "convolutional_alexnet/conv1/weights" in var.name:
      if "detection/biases" in var.name:
      print(sess.run(var))
      '''

        # extract frozed graph
        # http://workpiles.com/2016/07/tensorflow-protobuf-dump/
        frozen_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(), ["upsample/final_result"])

        frozen_graph = tf.Graph()
        with frozen_graph.as_default():
            tf.import_graph_def(frozen_graph_def)
            tf.train.write_graph(frozen_graph_def,
                                 model_save_dir,
                                 'whole_model_scale' + str(args.scale) + '.pb',
                                 as_text=False)
Ejemplo n.º 17
0
def train(images, labels, ckpt_path, dropout=False):
  """
  This function contains the loop that actually trains the model.
  :param images: a numpy array with the input data
  :param labels: a numpy array with the output labels
  :param ckpt_path: a path (including name) where model checkpoints are saved
  :param dropout: Boolean, whether to use dropout or not
  :return: True if everything went well
  """

  # Check training data
  assert len(images) == len(labels)
  assert images.dtype == np.float32
  assert labels.dtype == np.int32

  # Set default TF graph
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Declare data placeholder
    train_data_node = _input_placeholder()

    # Create a placeholder to hold labels
    train_labels_shape = (FLAGS.batch_size,)
    train_labels_node = tf.placeholder(tf.int32, shape=train_labels_shape)

    print("Done Initializing Training Placeholders")

    # Build a Graph that computes the logits predictions from the placeholder
    if FLAGS.deeper:
      logits = inference_deeper(train_data_node, dropout=dropout)
    else:
      logits = inference(train_data_node, dropout=dropout)

    # Calculate loss
    loss = loss_fun(logits, train_labels_node)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = train_op_fun(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

    print("Graph constructed and saver created")

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    # Create and init sessions
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) #NOLINT(long-line)
    sess.run(init)

    print("Session ready, beginning training loop")

    # Initialize the number of batches
    data_length = len(images)
    nb_batches = math.ceil(data_length / FLAGS.batch_size)

    for step in xrange(FLAGS.max_steps):
      # for debug, save start time
      start_time = time.time()

      # Current batch number
      batch_nb = step % nb_batches

      # Current batch start and end indices
      start, end = utils.batch_indices(batch_nb, data_length, FLAGS.batch_size)

      # Prepare dictionnary to feed the session with
      feed_dict = {train_data_node: images[range(start, end)],
                   train_labels_node: labels[range(start, end)]}

      # Run training step
      _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

      # Compute duration of training step
      duration = time.time() - start_time

      # Sanity check
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      # Echo loss once in a while
      if step % 100 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        saver.save(sess, ckpt_path, global_step=step)

  return True
Ejemplo n.º 18
0
def model_inference(model_fn, model_dir, checkpoint_path, data_fn, hparams,
                    examples_path, output_dir, summary_writer, master,
                    preprocess_examples, shuffle_examples):
    """Runs inference for the given examples."""
    tf.logging.info('model_dir=%s', model_dir)
    tf.logging.info('checkpoint_path=%s', checkpoint_path)
    tf.logging.info('examples_path=%s', examples_path)
    tf.logging.info('output_dir=%s', output_dir)

    estimator = train_util.create_estimator(model_fn,
                                            model_dir,
                                            hparams,
                                            master=master)

    transcription_data = functools.partial(
        data_fn,
        examples=examples_path,
        preprocess_examples=preprocess_examples,
        is_training=False,
        shuffle_examples=shuffle_examples,
        skip_n_initial_records=0)

    input_fn = infer_util.labels_to_features_wrapper(transcription_data)

    start_time = time.time()
    infer_times = []
    num_frames = []

    file_num = 0

    all_metrics = collections.defaultdict(list)

    for predictions in estimator.predict(input_fn,
                                         checkpoint_path=checkpoint_path,
                                         yield_single_examples=False):

        # Remove batch dimension for convenience.
        for k in predictions.keys():
            if predictions[k].shape[0] != 1:
                raise ValueError(
                    'All predictions must have batch size 1, but shape of '
                    '{} was: {}'.format(k, +predictions[k].shape[0]))
            predictions[k] = predictions[k][0]

        end_time = time.time()
        infer_time = end_time - start_time
        infer_times.append(infer_time)
        num_frames.append(predictions['frame_predictions'].shape[0])
        tf.logging.info(
            'Infer time %f, frames %d, frames/sec %f, running average %f',
            infer_time, num_frames[-1], num_frames[-1] / infer_time,
            np.sum(num_frames) / np.sum(infer_times))

        tf.logging.info('Scoring sequence %s', predictions['sequence_ids'])

        sequence_prediction = music_pb2.NoteSequence.FromString(
            predictions['sequence_predictions'])
        sequence_label = music_pb2.NoteSequence.FromString(
            predictions['sequence_labels'])

        # Make filenames UNIX-friendly.
        filename_chars = six.ensure_text(predictions['sequence_ids'], 'utf-8')
        filename_chars = [c if c.isalnum() else '_' for c in filename_chars]
        filename_safe = ''.join(filename_chars).rstrip()
        filename_safe = '{:04d}_{}'.format(file_num, filename_safe[:200])
        file_num += 1
        output_file = os.path.join(output_dir, filename_safe + '.mid')
        tf.logging.info('Writing inferred midi file to %s', output_file)
        midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file)

        label_output_file = os.path.join(output_dir,
                                         filename_safe + '_label.mid')
        tf.logging.info('Writing label midi file to %s', label_output_file)
        midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file)

        # Also write a pianoroll showing acoustic model output vs labels.
        pianoroll_output_file = os.path.join(output_dir,
                                             filename_safe + '_pianoroll.png')
        tf.logging.info('Writing acoustic logit/label file to %s',
                        pianoroll_output_file)
        # Calculate frames based on the sequence. Includes any postprocessing done
        # to turn raw onsets/frames predictions into the final sequence.
        # TODO(fjord): This work is duplicated in metrics.py.
        sequence_frame_predictions = sequences_lib.sequence_to_pianoroll(
            sequence_prediction,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_pitch=constants.MIN_MIDI_PITCH,
            max_pitch=constants.MAX_MIDI_PITCH).active
        with tf.gfile.GFile(pianoroll_output_file, mode='w') as f:
            imageio.imwrite(f,
                            infer_util.posterior_pianoroll_image(
                                predictions['onset_probs'],
                                predictions['onset_labels'],
                                predictions['frame_probs'],
                                predictions['frame_labels'],
                                sequence_frame_predictions),
                            format='png')

        # Update histogram and current scalar for metrics.
        with tf.Graph().as_default(), tf.Session().as_default():
            for k, v in predictions.items():
                if not k.startswith('metrics/'):
                    continue
                all_metrics[k].extend(v)
                histogram_name = k + '_histogram'
                metric_summary = tf.summary.histogram(histogram_name,
                                                      all_metrics[k])
                summary_writer.add_summary(metric_summary.eval(),
                                           global_step=file_num)
                scalar_name = k
                metric_summary = tf.summary.scalar(scalar_name,
                                                   np.mean(all_metrics[k]))
                summary_writer.add_summary(metric_summary.eval(),
                                           global_step=file_num)
            summary_writer.flush()

        start_time = time.time()

    # Write final mean values for all metrics.
    with tf.Graph().as_default(), tf.Session().as_default():
        for k, v in all_metrics.items():
            final_scalar_name = 'final/' + k
            metric_summary = tf.summary.scalar(final_scalar_name,
                                               np.mean(all_metrics[k]))
            summary_writer.add_summary(metric_summary.eval())
        summary_writer.flush()
Ejemplo n.º 19
0
def run_train(scope):
    """Trains a network.

  Args:
    scope: the scope of variables in this function
  """
    with tf.Graph().as_default():
        with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)):
            to_gray = True
            if 'sem' in FLAGS.network_id:
                to_gray = False
            batch_frames, batch_labels = get_samples(to_gray, 'train')
            batch_hmg_prediction, _ = predict_homography(
                batch_frames,
                network_id=FLAGS.network_id,
                is_training=True,
                scope=scope)

            if FLAGS.loss == 'hier_l2':
                for level in range(FLAGS.num_level):
                    delta_level = FLAGS.num_level - level - 1
                    scale = 2**delta_level
                    l2 = tf.losses.mean_squared_error(
                        batch_labels / scale, batch_hmg_prediction[level])
                    slim.summaries.add_scalar_summary(l2, 'l2%d' % delta_level,
                                                      'losses')
            elif FLAGS.loss == 'hier_ld':
                for level in range(FLAGS.num_level):
                    delta_level = FLAGS.num_level - level - 1
                    scale = 2**delta_level
                    diff = tf.reshape(
                        batch_labels / scale - batch_hmg_prediction[level],
                        [FLAGS.batch_size, 4, 2])
                    l2d = tf.reduce_mean(
                        tf.sqrt(tf.reduce_sum(tf.square(diff), 2)))
                    tf.losses.add_loss(l2d)
                    slim.summaries.add_scalar_summary(l2d,
                                                      'l2%d' % delta_level,
                                                      'losses')
            else:
                l2 = tf.losses.mean_squared_error(
                    batch_labels, batch_hmg_prediction[FLAGS.num_level - 1])
            slim.summaries.add_scalar_summary(slim.losses.get_total_loss(),
                                              'loss', 'losses')

            global_step = slim.get_or_create_global_step()
            learning_rate_decay = tf.train.exponential_decay(
                learning_rate=FLAGS.learning_rate,
                global_step=global_step,
                decay_steps=FLAGS.lr_decay_steps,
                decay_rate=FLAGS.lr_decay_rate,
                staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate_decay)

            is_chief = (FLAGS.task == 0)
            train_op = slim.learning.create_train_op(
                slim.losses.get_total_loss(), optimizer=optimizer)
            saver = tf.train.Saver(max_to_keep=20)
            if FLAGS.level_wise == 0:
                variables_to_restore = []
                for i in range(0, FLAGS.num_level - 1):
                    variables = slim.get_variables(scope='%s/level%d' %
                                                   (scope, i))
                    variables_to_restore = variables_to_restore + variables
                init_fn = slim.assign_from_checkpoint_fn(
                    FLAGS.model_path, variables_to_restore)
            elif 'sem' in FLAGS.network_id:
                variables_to_restore = slim.get_variables(scope='vgg_16')
                init_fn = slim.assign_from_checkpoint_fn(
                    FLAGS.vgg_model_path, variables_to_restore)
            else:
                init_fn = None
            slim.learning.train(train_op=train_op,
                                logdir=FLAGS.train_dir,
                                save_summaries_secs=60,
                                save_interval_secs=600,
                                saver=saver,
                                number_of_steps=FLAGS.max_step,
                                master=FLAGS.master,
                                is_chief=is_chief,
                                init_fn=init_fn)
Ejemplo n.º 20
0
def AdaBatch(gpu_id,
             input_reader,
             model_type,
             training_epochs,
             batch_size,
             lr_boundaries,
             lr_values,
             optimizer_type,
             update_method,
             warm_up_period,
             s_e=100.0,
             pretrain=0,
             log_dir="log"):
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    text_log = []
    text_log.append(
        "epoch, time(s), learning rate, minibatch loss, minibatch error, test loss, test error"
    )

    num_train_images = input_reader.num_train_images
    num_val_images = input_reader.num_val_images
    num_label = input_reader.num_classes
    image_shape = [input_reader.width, input_reader.height, input_reader.depth]

    train_batch_patcher = patcher.BatchPatcher(num_train_images,
                                               batch_size,
                                               num_label,
                                               s_e=s_e,
                                               update_method=update_method)
    validation_batch_patcher = patcher.BatchPatcher(
        num_val_images, batch_size, num_label, update_method=update_method)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.visible_device_list = str(gpu_id)
    config.gpu_options.allow_growth = True
    graph = tf.Graph()

    with graph.as_default():
        with tf.device('/gpu:' + str(gpu_id)):
            with tf.Session(config=config) as sess:

                # Input Graph Generation #############################################################################
                t_ids, t_images, t_labels = input_reader.data_read(batch_size,
                                                                   train=True)
                v_ids, v_images, v_labels = input_reader.data_read(batch_size,
                                                                   train=False)

                # Model Graph Construction ###########################################################################
                if model_type == "DenseNet-25-12":
                    model = DenseNet(25, 12, image_shape, num_label,
                                     batch_size, batch_size)
                elif model_type == "WideResNet16-8":
                    model = WideResNet(16, 8, image_shape, num_label,
                                       batch_size, batch_size)

                train_loss_op, train_accuracy_op, train_op, _, train_distance_op = model.build_train_op(
                    lr_boundaries, lr_values, optimizer_type)
                test_loss_op, test_accuracy_op, _ = model.build_test_op()

                # Data load in memeory ###############################################################################
                print("start to load data set.")
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(coord=coord)
                train_batch_patcher.bulk_load_in_memory(
                    sess, t_ids, t_images, t_labels)
                validation_batch_patcher.bulk_load_in_memory(
                    sess, v_ids, v_images, v_labels)

                start_time = time.time()
                # Model Initialization ###########################################################################
                # init params: we share the initial epochs. See paper.
                if pretrain != 0:
                    start_time = time.time()
                    saver = tf.train.Saver()
                    file_dir = "init_weight/" + input_reader.dataset_name + "/" + model_type + "_" + optimizer_type + "_lr=" + str(
                        lr_values[0]) + "_e=" + str(pretrain) + "/"
                    minus_start_time = 0
                    with open(file_dir + "text_log.csv") as f:
                        for line in f:
                            print(line, end="")
                            text_log.append(line.rstrip())
                            minus_start_time = line.split(",")[1]
                    start_time = start_time - float(minus_start_time)
                    saver.restore(sess, file_dir + "model.ckpt")

                    for i in range(train_batch_patcher.num_iters_per_epoch):
                        ids, images, labels = train_batch_patcher.get_init_mini_batch(
                            i)
                        distance = sess.run(train_distance_op,
                                            feed_dict={
                                                model.train_image_placeholder:
                                                images,
                                                model.train_label_placeholder:
                                                labels
                                            })
                        train_batch_patcher.update_prob_table(ids, distance)
                    print(train_batch_patcher.prob_table.table)

                    print("shared weight is successfully loaded")
                else:
                    sess.run(tf.global_variables_initializer())

                # Traing Process #####################################################################################

                for epoch in range(pretrain, training_epochs):

                    if epoch < warm_up_period:
                        is_warm_up = True
                    else:
                        is_warm_up = False

                    # (1) Mini-batch loss and error along with netowrk updates
                    avg_mini_loss = 0.0
                    avg_mini_acc = 0.0
                    for i in range(train_batch_patcher.num_iters_per_epoch):
                        # Next batch depends on the method: {Ada_Boundary, Ada-Hard, Ada-Uniform}
                        ids, images, labels = train_batch_patcher.get_next_mini_batch(
                            num_of_sample=batch_size, is_warm_up=is_warm_up)
                        mini_loss, mini_acc, _, distance = sess.run(
                            [
                                train_loss_op, train_accuracy_op, train_op,
                                train_distance_op
                            ],
                            feed_dict={
                                model.train_image_placeholder: images,
                                model.train_label_placeholder: labels
                            })
                        train_batch_patcher.update_prob_table(ids, distance)
                        avg_mini_loss += mini_loss
                        avg_mini_acc += mini_acc
                    avg_mini_loss /= train_batch_patcher.num_iters_per_epoch
                    avg_mini_acc /= train_batch_patcher.num_iters_per_epoch

                    # (2) Compute training loss and error
                    avg_train_loss = 0.0
                    avg_train_acc = 0.0
                    for i in range(train_batch_patcher.num_iters_per_epoch):
                        ids, images, labels = train_batch_patcher.get_init_mini_batch(
                            i)
                        train_loss, train_acc = sess.run(
                            [test_loss_op, test_accuracy_op],
                            feed_dict={
                                model.test_image_placeholder: images,
                                model.test_label_placeholder: labels
                            })
                        avg_train_loss += train_loss
                        avg_train_acc += train_acc
                    avg_train_loss /= train_batch_patcher.num_iters_per_epoch
                    avg_train_acc /= train_batch_patcher.num_iters_per_epoch

                    # (3) Validation (or test) loss and error
                    avg_val_loss = 0.0
                    avg_val_acc = 0.0
                    for i in range(
                            validation_batch_patcher.num_iters_per_epoch):
                        ids, images, labels = validation_batch_patcher.get_init_mini_batch(
                            i)
                        val_loss, val_acc = sess.run(
                            [test_loss_op, test_accuracy_op],
                            feed_dict={
                                model.test_image_placeholder: images,
                                model.test_label_placeholder: labels
                            })
                        avg_val_loss += val_loss
                        avg_val_acc += val_acc
                    avg_val_loss /= validation_batch_patcher.num_iters_per_epoch
                    avg_val_acc /= validation_batch_patcher.num_iters_per_epoch

                    # Log Writing ####################################################################################
                    cur_lr = sess.run(model.learning_rate)
                    print((epoch + 1), ", ", int(time.time() - start_time),
                          ", ", cur_lr, ", ", avg_mini_loss, ", ",
                          (1.0 - avg_mini_acc), ", ", avg_train_loss, ", ",
                          (1.0 - avg_train_acc), ", ", avg_val_loss, ", ",
                          (1.0 - avg_val_acc))
                    text_log.append(
                        str(epoch + 1) + ", " +
                        str(int(time.time() - start_time)) + ", " +
                        str(cur_lr) + ", " + str(avg_mini_loss) + ", " +
                        str(1.0 - avg_mini_acc) + ", " + str(avg_train_loss) +
                        ", " + str(1.0 - avg_train_acc) + ", " +
                        str(avg_val_loss) + ", " + str(1.0 - avg_val_acc))

                coord.request_stop()
                coord.join(threads)
                sess.close()

        # Log Flushing
        f = open(log_dir + "/text_log.csv", "w")
        for text in text_log:
            f.write(text + "\n")
        f.close()
Ejemplo n.º 21
0
    def __init__(self, bert_config, tokenizer, **kwargs):
        device = get_device(**kwargs)
        _graph = tf.Graph()
        with _graph.as_default():
            with tf.device(device):
                self.X = tf.placeholder(tf.int32, [None, None])
                self.segment_ids = tf.placeholder(tf.int32, [None, None])
                self.top_p = tf.placeholder(tf.float32, None)
                self.top_k = tf.placeholder(tf.int32, None)
                self.k = tf.placeholder(tf.int32, None)
                self.temperature = tf.placeholder(tf.float32, None)
                self.indices = tf.placeholder(tf.int32, [None, None])
                self.MASK = tf.placeholder(tf.int32, [None, None])
                self._tokenizer = tokenizer

                self.model = modeling.BertModel(
                    config=bert_config,
                    is_training=False,
                    input_ids=self.X,
                    input_mask=self.MASK,
                    use_one_hot_embeddings=False,
                )
                self.logits = self.model.get_pooled_output()
                output_layer = self.model.get_sequence_output()
                embedding = self.model.get_embedding_table()

                with tf.variable_scope('cls/predictions'):
                    with tf.variable_scope('transform'):
                        input_tensor = tf.layers.dense(
                            output_layer,
                            units=bert_config.hidden_size,
                            activation=modeling.get_activation(
                                bert_config.hidden_act),
                            kernel_initializer=modeling.create_initializer(
                                bert_config.initializer_range),
                        )
                        input_tensor = modeling.layer_norm(input_tensor)
                    output_bias = tf.get_variable(
                        'output_bias',
                        shape=[bert_config.vocab_size],
                        initializer=tf.zeros_initializer(),
                    )
                    logits = tf.matmul(input_tensor,
                                       embedding,
                                       transpose_b=True)
                    self._logits = tf.nn.bias_add(logits, output_bias)
                    self._log_softmax = tf.nn.log_softmax(self._logits)

                logits = tf.gather_nd(self._logits, self.indices)
                logits = logits / self.temperature

                def necleus():
                    return top_p_logits(logits, self.top_p)

                def select_k():
                    return top_k_logits(logits, self.top_k)

                logits = tf.cond(self.top_p > 0, necleus, select_k)
                self.samples = tf.multinomial(logits,
                                              num_samples=self.k,
                                              output_dtype=tf.int32)

                self._sess = generate_session(_graph, **kwargs)
                self._sess.run(tf.global_variables_initializer())
                var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                              scope='bert')
                cls = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        scope='cls')
                self._saver = tf.train.Saver(var_list=var_lists + cls)
                attns = _extract_attention_weights(
                    bert_config.num_hidden_layers, tf.get_default_graph())
                self.attns = attns
Ejemplo n.º 22
0
# here in case it was not downloaded or put in the correct location.
if not os.path.exists(MODEL_FILE):
    opener = urllib.request.URLopener()
    opener.retrieve(
        'http://download.tensorflow.org/models/object_detection/' + MODEL_FILE,
        MODEL_FILE)
    tar_file = tarfile.open(MODEL_FILE)
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            tar_file.extract(file, os.getcwd())

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

# Create the mapping from label index to human-readable object label (e.g. 'Person')
PATH_TO_LABELS = os.path.join('object_detection', 'data',
                              'mscoco_label_map.pbtxt')
#PATH_TO_LABELS='/home/pi/final_project/v1/fa20-cs498it-lab1-master/src/adas/object_detection/data/mscoco_label_map.pbtxt'
#categories = label_map_util.create_categories_from_labelmap(PATH_TO_LABELS, use_display_name=True

sys.path.append('')
category_index = label_map_util.create_category_index_from_labelmap(
def train_collect_eval(collect_env,
                       eval_env,
                       test_env,
                       policy_class,
                       run_agent_fn=run_env.run_env,
                       train_fn=train_q.train_q,
                       do_collect_eval=True,
                       file_patterns='',
                       get_data_fn=input_data.get_data,
                       onpolicy=True,
                       num_collect=100,
                       num_eval=100,
                       num_test=100,
                       data_format='tfrecord',
                       eval_frequency=5,
                       root_dir=None,
                       task=0,
                       master='',
                       ps_tasks=0):
    """Runs synchronous train, collect, eval loop.

  This loop instantiates the policy instance from policy_class. The policy
  manages its own tf.Session. The train function may create its own session for
  the purpose of updating its variables.

  train_fn reuses graph created by policy, to avoid having to
  configure the same neural net twice (one for policy and one for training.)

  Args:
    collect_env: (gym.Env) Gym environment to collect data from (and train the
      policy on).
    eval_env: (gym.Env) Gym environment to evaluate the policy on. Can be
      another instance of collect_env, or a different environment if one wishes
      to evaluate generalization capability. The only constraint is that the
      action and observation spaces have to be equivalent. If None, eval_env
      is not evaluated.
    test_env: (gym.Env) Another environment to evaluate on.  Either another
      instance of collect_env, or a different environment to evaluate
      generalization.
    policy_class: Policy class that we want to train.
    run_agent_fn: (Optional) Python function that executes the interaction of
      the policy with the environment. Defaults to run_env.run_env.
    train_fn: (Optional) Python function that trains the policy. Defaults to
      train_q.train_q.
    do_collect_eval: If True, performs data collection using the trained policy.
    file_patterns: (str) Comma-separated regex of file patterns to train on.
      This is used to instantiate the file-backed "replay buffer".
    get_data_fn: (Optional) Python function that fetches data from files.
    onpolicy: (bool) If True, appends data from policy_collect directory.
    num_collect: (int) Number of episodes to collect & evaluate from
      collect_env.
    num_eval: (int) Number of episodes to collect & evaluate from eval_env.
    num_test: (int) Number of episodes to collect & evaluate from test_env.
    data_format: (string) File extension of input data files.
    eval_frequency: (int) How many times we run eval/test vs. collect.
      Evaluating is costly compared to training, so we can speed up iteration
      time by not evaluating every time we collect.
    root_dir: (str) Root directory for this training trial. Training directory,
      eval directory are subdirectories of root_dir.
    task: (int) Optional worker task for distributed training. Defaults to solo
      master task on a single machine
    master: (int) Optional address of master worker. Specify this when doing
      distributed training.
    ps_tasks: (int) Optional number of parameter-server tasks. Used only for
      distributed TF training jobs.

  Raises:
    ValueError: If ps_tasks > 0 (implies distributed training) while
      do_collect_eval is set to True.
  """
    # Spaces do not implement `==` operator. Convert to strings to check
    # compatibility between training & eval env representation.
    if ((collect_env and eval_env) and
        (str(collect_env.observation_space), str(collect_env.action_space)) !=
        (str(eval_env.observation_space), str(eval_env.action_space))):
        raise ValueError('Collect and Eval environments have incompatible '
                         'observation or action dimensions.')
    if ps_tasks > 0 and do_collect_eval:
        raise ValueError(
            'Collecting data not supported by distributed training jobs')
    if onpolicy:
        file_patterns += ',' + os.path.join(root_dir, 'policy_collect',
                                            '*.%s' % data_format)
    train_dir = os.path.join(root_dir, 'train')
    it = 0
    while True:
        tf.reset_default_graph()
        # Re-fresh the source of data.
        with tf.Graph().as_default():
            with tf.device(tf.train.replica_device_setter(ps_tasks)):
                policy = policy_class()
                if train_fn:
                    dataset = get_data_fn(file_patterns=file_patterns)
                    step, done = train_fn(dataset,
                                          policy,
                                          log_dir=train_dir,
                                          reuse=True,
                                          task=task,
                                          master=master)
                else:
                    step, done = 0, True
                if train_fn:
                    tf.logging.info('Evaluating policy at step %d' % step)
                    ckpt = tf.train.latest_checkpoint(train_dir)
                    tf.logging.info('Restoring model variables from %s' % ckpt)
                    policy.restore(ckpt)
                    if ckpt:
                        step = int(ckpt.split('.ckpt-')[-1])
                if onpolicy:
                    run_agent_fn(collect_env,
                                 policy=policy,
                                 global_step=step,
                                 root_dir=root_dir,
                                 task=task,
                                 num_episodes=num_collect,
                                 tag='collect')

                if it % eval_frequency == 0:
                    if eval_env:
                        run_agent_fn(eval_env,
                                     policy=policy,
                                     global_step=step,
                                     root_dir=root_dir,
                                     task=task,
                                     explore_schedule=None,
                                     num_episodes=num_eval,
                                     tag='eval')
                    if test_env:
                        run_agent_fn(test_env,
                                     policy=policy,
                                     global_step=step,
                                     root_dir=root_dir,
                                     task=task,
                                     explore_schedule=None,
                                     num_episodes=num_test,
                                     tag='test')

                it += 1
            if done:
                tf.logging.info('Train-Collect-Eval completed.')
                break
Ejemplo n.º 24
0
    def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
        """Generate samples of the encoded frames with possible extra data.

    By default this function just encodes the numpy array returned as "frame"
    from `self.generate_samples` into a PNG image. Override this function to
    get other encodings on disk.

    Args:
      data_dir: final data directory. Typically only used in this method to copy
        over user-supplied vocab files if there are extra fields needing them.
      tmp_dir: temporary directory that you can use for downloading and scratch.
      dataset_split: problem.DatasetSplit, which data split to generate samples
        for (for example, training and evaluation).

    Yields:
      Sample: dict<str feature_name, feature value> which is in disk encoding.

    Raises:
      ValueError: if the frame has a different number of channels than required.
    """
        writer = None

        with tf.Graph().as_default():
            image_t = tf.placeholder(dtype=tf.uint8, shape=(None, None, None))
            encoded_image_t = tf.image.encode_png(image_t)
            with tf.Session() as sess:
                for features in self.generate_samples(data_dir, tmp_dir,
                                                      dataset_split):
                    unencoded_frame = features.pop("frame")
                    self.validate_frame(unencoded_frame)
                    height, width, _ = unencoded_frame.shape
                    encoded_frame = sess.run(
                        encoded_image_t, feed_dict={image_t: unencoded_frame})
                    features["image/encoded"] = [encoded_frame]
                    features["image/format"] = ["png"]
                    features["image/height"] = [height]
                    features["image/width"] = [width]

                    has_debug_image = "image/debug" in features
                    if has_debug_image:
                        unencoded_debug = features.pop("image/debug")
                        encoded_debug = sess.run(
                            encoded_image_t,
                            feed_dict={image_t: unencoded_debug})
                        features["image/encoded_debug"] = [encoded_debug]

                    if self.debug_dump_frames_path:
                        # Defer creating debug writer until we know debug_dump_frames_path.
                        if writer is None:
                            if not tf.gfile.Exists(
                                    self.debug_dump_frames_path):
                                tf.gfile.MkDir(self.debug_dump_frames_path)
                            writer = debug_video_writer_factory(
                                self.debug_dump_frames_path)
                        img = unencoded_debug if has_debug_image else unencoded_frame
                        encoded_img = encoded_debug if has_debug_image else encoded_frame
                        writer.write(img, encoded_img)

                    yield features

        if self.debug_dump_frames_path:
            writer.finish_to_disk()
Ejemplo n.º 25
0
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from build_cnn import build_cnn
from cnn_utils import load, predict
from std_mnist import X_test_centered, y_test

# 2019.12.07 add
## Define random seed
random_seed = 123
np.random.seed(random_seed)

## create a new graph
## and build the model
g2 = tf.Graph()
with g2.as_default():
    tf.set_random_seed(random_seed)
    ## build the graph
    build_cnn()

    ## saver:
    saver = tf.train.Saver()

np.set_printoptions(precision=2, suppress=True)

with tf.Session(graph=g2) as sess:
    load(saver, sess, epoch=20, path='./model/')

    print(predict(sess, X_test_centered[:10], return_proba=False))
Ejemplo n.º 26
0
def train(create_tensor_dict_fn,
          create_model_fn,
          train_config,
          master,
          task,
          num_clones,
          worker_replicas,
          clone_on_cpu,
          ps_tasks,
          worker_job_name,
          is_chief,
          train_dir,
          graph_hook_fn=None):
  """Training function for detection models.

  Args:
    create_tensor_dict_fn: a function to create a tensor input dictionary.
    create_model_fn: a function that creates a DetectionModel and generates
                     losses.
    train_config: a train_pb2.TrainConfig protobuf.
    master: BNS name of the TensorFlow master to use.
    task: The task id of this training instance.
    num_clones: The number of clones to run per machine.
    worker_replicas: The number of work replicas to train with.
    clone_on_cpu: True if clones should be forced to run on CPU.
    ps_tasks: Number of parameter server tasks.
    worker_job_name: Name of the worker job.
    is_chief: Whether this replica is the chief replica.
    train_dir: Directory to write checkpoints and training summaries to.
    graph_hook_fn: Optional function that is called after the inference graph is
      built (before optimization). This is helpful to perform additional changes
      to the training graph such as adding FakeQuant ops. The function should
      modify the default graph.

  Raises:
    ValueError: If both num_clones > 1 and train_config.sync_replicas is true.
  """

  detection_model = create_model_fn()
  data_augmentation_options = [
      preprocessor_builder.build(step)
      for step in train_config.data_augmentation_options]

  with tf.Graph().as_default():
    # Build a configuration specifying multi-GPU and multi-replicas.
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=num_clones,
        clone_on_cpu=clone_on_cpu,
        replica_id=task,
        num_replicas=worker_replicas,
        num_ps_tasks=ps_tasks,
        worker_job_name=worker_job_name)

    # Place the global step on the device storing the variables.
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    if num_clones != 1 and train_config.sync_replicas:
      raise ValueError('In Synchronous SGD mode num_clones must ',
                       'be 1. Found num_clones: {}'.format(num_clones))
    batch_size = train_config.batch_size // num_clones
    if train_config.sync_replicas:
      batch_size //= train_config.replicas_to_aggregate

    with tf.device(deploy_config.inputs_device()):
      input_queue = create_input_queue(
          batch_size, create_tensor_dict_fn,
          train_config.batch_queue_capacity,
          train_config.num_batch_queue_threads,
          train_config.prefetch_queue_capacity, data_augmentation_options)

    # Gather initial summaries.
    # TODO(rathodv): See if summaries can be added/extracted from global tf
    # collections so that they don't have to be passed around.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    global_summaries = set([])

    model_fn = functools.partial(_create_losses,
                                 create_model_fn=create_model_fn,
                                 train_config=train_config)
    clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue])
    first_clone_scope = clones[0].scope

    if graph_hook_fn:
      with tf.device(deploy_config.variables_device()):
        graph_hook_fn()

    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by model_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    with tf.device(deploy_config.optimizer_device()):
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)
      for var in optimizer_summary_vars:
        tf.summary.scalar(var.op.name, var, family='LearningRate')

    sync_optimizer = None
    if train_config.sync_replicas:
      training_optimizer = tf.train.SyncReplicasOptimizer(
          training_optimizer,
          replicas_to_aggregate=train_config.replicas_to_aggregate,
          total_num_replicas=worker_replicas)
      sync_optimizer = training_optimizer

    with tf.device(deploy_config.optimizer_device()):
      regularization_losses = (None if train_config.add_regularization_loss
                               else [])
      total_loss, grads_and_vars = model_deploy.optimize_clones(
          clones, training_optimizer,
          regularization_losses=regularization_losses)
      total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.')

      # Optionally multiply bias gradients by train_config.bias_grad_multiplier.
      if train_config.bias_grad_multiplier:
        biases_regex_list = ['.*/biases']
        grads_and_vars = variables_helper.multiply_gradients_matching_regex(
            grads_and_vars,
            biases_regex_list,
            multiplier=train_config.bias_grad_multiplier)

      # Optionally freeze some layers by setting their gradients to be zero.
      if train_config.freeze_variables:
        grads_and_vars = variables_helper.freeze_gradients_matching_regex(
            grads_and_vars, train_config.freeze_variables)

      # Optionally clip gradients
      if train_config.gradient_clipping_by_norm > 0:
        with tf.name_scope('clip_grads'):
          grads_and_vars = slim.learning.clip_gradient_norms(
              grads_and_vars, train_config.gradient_clipping_by_norm)

      # Create gradient updates.
      grad_updates = training_optimizer.apply_gradients(grads_and_vars,
                                                        global_step=global_step)
      update_ops.append(grad_updates)
      update_op = tf.group(*update_ops, name='update_barrier')
      with tf.control_dependencies([update_op]):
        train_tensor = tf.identity(total_loss, name='train_op')

    # Add summaries.
    for model_var in slim.get_model_variables():
      global_summaries.add(tf.summary.histogram('ModelVars/' +
                                                model_var.op.name, model_var))
    for loss_tensor in tf.losses.get_losses():
      global_summaries.add(tf.summary.scalar('Losses/' + loss_tensor.op.name,
                                             loss_tensor))
    global_summaries.add(
        tf.summary.scalar('Losses/TotalLoss', tf.losses.get_total_loss()))

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))
    summaries |= global_summaries

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    # Soft placement allows placing on CPU ops without GPU implementation.
    session_config = tf.ConfigProto(allow_soft_placement=True,
                                    log_device_placement=False)

    # Save checkpoints regularly.
    keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours
    saver = tf.train.Saver(
        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)

    # Create ops required to initialize the model from a given checkpoint.
    init_fn = None
    if train_config.fine_tune_checkpoint:
      if not train_config.fine_tune_checkpoint_type:
        # train_config.from_detection_checkpoint field is deprecated. For
        # backward compatibility, fine_tune_checkpoint_type is set based on
        # from_detection_checkpoint.
        if train_config.from_detection_checkpoint:
          train_config.fine_tune_checkpoint_type = 'detection'
        else:
          train_config.fine_tune_checkpoint_type = 'classification'
      var_map = detection_model.restore_map(
          fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
          load_all_detection_checkpoint_vars=(
              train_config.load_all_detection_checkpoint_vars))
      available_var_map = (variables_helper.
                           get_variables_available_in_checkpoint(
                               var_map, train_config.fine_tune_checkpoint,
                               include_global_step=False))
      init_saver = tf.train.Saver(available_var_map)
      def initializer_fn(sess):
        init_saver.restore(sess, train_config.fine_tune_checkpoint)
      init_fn = initializer_fn

    slim.learning.train(
        train_tensor,
        logdir=train_dir,
        master=master,
        is_chief=is_chief,
        session_config=session_config,
        startup_delay_steps=train_config.startup_delay_steps,
        init_fn=init_fn,
        summary_op=summary_op,
        number_of_steps=(
            train_config.num_steps if train_config.num_steps else None),
        save_summaries_secs=120,
        sync_optimizer=sync_optimizer,
        saver=saver)
Ejemplo n.º 27
0
    def evaluate(self, input_fn, checkpoint_path=None):
        if not tf.train.latest_checkpoint(checkpoint_path):
            raise ValueError("Could not find trained model at %s" %
                             checkpoint_path)

        with tf.Graph().as_default():
            features, labels = self._get_features_and_labels_from_input_fn(
                input_fn, ModeKeys.EVAL)
            spec, model = self._get_model_spec(features, labels, ModeKeys.EVAL)

            # Track the average loss in default
            eval_metric_ops = spec.eval_metric_ops or {}
            if model_fn_lib.LOSS_METRIC_KEY not in eval_metric_ops:
                loss_metric = tf.metrics.mean(spec.loss)
                eval_metric_ops[model_fn_lib.LOSS_METRIC_KEY] = loss_metric

            # Create the real eval op
            update_ops, eval_dict = _extract_metric_update_ops(eval_metric_ops)
            update_ops.extend(model._train_ops)
            eval_op = tf.group(*update_ops)

            # Also track the global step
            if tf.GraphKeys.GLOBAL_STEP in eval_dict:
                raise ValueError(
                    'Metric with name `global_step` is not allowed, because '
                    'Estimator already defines a default metric with the '
                    'same name.')
            eval_dict[tf.GraphKeys.GLOBAL_STEP] = \
                tf.train.get_or_create_global_step()

            # Prepare the session creator.
            scaffold = tf.train.Scaffold()
            session_creator = tf.train.ChiefSessionCreator(
                scaffold=scaffold, checkpoint_dir=checkpoint_path)

            # Prepare hooks
            all_hooks = list(spec.evaluation_hooks) or []
            final_ops_hook = tf.train.FinalOpsHook(eval_dict)
            all_hooks.append(final_ops_hook)

            # Evaluate over dataset
            self._bridge.connect()
            try:
                with tf.train.MonitoredSession(session_creator=session_creator,
                                               hooks=all_hooks) as sess:
                    iter_id = 0
                    while not sess.should_stop():
                        self._bridge.start(iter_id)
                        logging.debug('after bridge start.')
                        start_time = time.time()
                        sess.run(eval_op)
                        end_time = time.time()
                        metrics.emit_timer(name="iter_timer",
                                           value=end_time - start_time,
                                           tags={})
                        logging.debug('after session run.')
                        self._bridge.commit()
                        logging.debug('after bridge commit.')
                        iter_id += 1
            finally:
                self._bridge.terminate()

            # Print result
            logging.info('Metrics for iteration %d: %s', iter_id,
                         _dict_to_str(final_ops_hook.final_ops_values))
            return final_ops_hook.final_ops_values
  def testApplyCompression(self):
    with tf.Graph().as_default():
      with self.cached_session():
        compression_hparams = ("name=cifar10_compression,"
                               "begin_compression_step=1000,"
                               "end_compression_step=120000,"
                               "compression_frequency=100,"
                               "compression_option=1")
        compression_op_spec = (
            compression_op.CompressionOp.get_default_hparams().parse(
                compression_hparams))
        compressor_spec = (
            compression_op.LowRankDecompMatrixCompressor.get_default_hparams()
            .parse("num_rows=5,num_cols=5,rank=200"))
        matrix_compressor = compression_op.LowRankDecompMatrixCompressor(
            spec=compressor_spec)

        global_step = tf.compat.v1.get_variable("global_step", initializer=30)

        apply_comp = compression_op.ApplyCompression(
            scope="default_scope",
            compression_spec=compression_op_spec,
            compressor=matrix_compressor,
            global_step=global_step)
        # Need to add initial value for a_matrix so that we would know what
        # to expect back.
        a_matrix_init = np.outer(np.array([1., 2., 3.]), np.array([4., 5., 6.]))
        a_matrix = tf.compat.v1.get_variable(
            "a_matrix",
            initializer=a_matrix_init.astype(np.float32),
            dtype=tf.float32)
        a_matrix_compressed = apply_comp.apply_compression(
            a_matrix, scope="first_compressor")
        c = apply_comp._compression_ops[0]

        a_matrix2 = tf.compat.v1.get_variable(
            "a_matrix2",
            initializer=a_matrix_init.astype(np.float32),
            dtype=tf.float32)
        _ = apply_comp.apply_compression(a_matrix2, scope="second_compressor")
        c2 = apply_comp._compression_ops[1]

        _ = apply_comp.all_update_op()

        tf.compat.v1.global_variables_initializer().run()
        _ = a_matrix_compressed.eval()
        self.assertEqual(c._global_step.eval(), 30)
        self.assertEqual(c.alpha.eval(), 1.0)
        self.assertEqual(c2.alpha.eval(), 1.0)
        self.assertEqual(c._last_alpha_update_step.eval(), -1)
        self.assertAllEqual(
            np.array([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, [True, False, False])

        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.a_matrix_tfvar.eval())) < 0.00001),
            False)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.b_matrix_tfvar.eval())) < 0.00001),
            True)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.c_matrix_tfvar.eval())) < 0.00001),
            True)
        tf.compat.v1.assign(global_step, 1001).eval()
        # apply_comp_update_op.run()
        apply_comp._all_update_op.run()
        _ = a_matrix_compressed.eval()
        self.assertEqual(c._global_step.eval(), 1001)
        self.assertAlmostEqual(c.alpha.eval(), 0.99)
        self.assertEqual(c._last_alpha_update_step.eval(), 1001)
        self.assertAllEqual(
            np.array([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, [True, True, True])
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.b_matrix_tfvar.eval())) < 0.00001),
            False)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.c_matrix_tfvar.eval())) < 0.00001),
            False)

        [b_matrix,
         c_matrix] = matrix_compressor.static_matrix_compressor(a_matrix_init)

        self.assertAllEqual(
            np.linalg.norm(np.abs(b_matrix) - np.abs(c.b_matrix_tfvar.eval())) <
            0.00001, True)
        self.assertAllEqual(
            np.linalg.norm(np.abs(c_matrix) - np.abs(c.c_matrix_tfvar.eval())) <
            0.00001, True)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.b_matrix_tfvar.eval())) < 0.00001),
            False)
        self.assertAllEqual(
            np.all(np.abs(np.linalg.norm(c.c_matrix_tfvar.eval())) < 0.00001),
            False)

        tf.compat.v1.assign(global_step, 1001).eval()
        apply_comp._all_update_op.run()
        _ = a_matrix_compressed.eval()
        self.assertEqual(c._global_step.eval(), 1001)
        self.assertAlmostEqual(c.alpha.eval(), 0.99)
        self.assertEqual(c._last_alpha_update_step.eval(), 1001)
        self.assertAllEqual(
            np.array([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, [True, True, True])

        tf.compat.v1.assign(global_step, 2000).eval()
        apply_comp._all_update_op.run()
        _ = a_matrix_compressed.eval()
        self.assertEqual(c._global_step.eval(), 2000)
        self.assertAlmostEqual(c.alpha.eval(), 0.98)
        self.assertAlmostEqual(c2.alpha.eval(), 0.98)
        self.assertEqual(c._last_alpha_update_step.eval(), 2000)
        self.assertAllEqual(
            np.array([
                np.linalg.norm(c.a_matrix_tfvar.eval()),
                np.linalg.norm(c.b_matrix_tfvar.eval()),
                np.linalg.norm(c.c_matrix_tfvar.eval())
            ]) > 0, [True, True, True])
Ejemplo n.º 29
0
    def train(self,
              input_fn,
              checkpoint_path=None,
              save_checkpoint_steps=None,
              save_checkpoint_secs=None):
        if self._cluster_spec is not None:
            device_fn = tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % self._worker_rank,
                merge_devices=True,
                cluster=self._cluster_spec)
            cluster_def = self._cluster_spec.as_cluster_def()
            local_address = self._cluster_spec.job_tasks('worker')[
                self._worker_rank]
            server = tf.train.Server(tf.train.ClusterSpec(
                {'local': {
                    0: local_address
                }}),
                job_name='local',
                task_index=0)
            target = 'grpc://' + local_address
        else:
            device_fn = None
            cluster_def = None
            target = None

        config = tf.ConfigProto(cluster_def=cluster_def)
        config.inter_op_parallelism_threads = 4
        config.intra_op_parallelism_threads = 4
        config.experimental.share_session_state_in_clusterspec_propagation \
            = True
        tf.config.set_soft_device_placement(False)

        with tf.Graph().as_default() as g:
            with tf.device(device_fn):
                features, labels = self._get_features_and_labels_from_input_fn(
                    input_fn, ModeKeys.TRAIN)
                spec, _ = self._get_model_spec(features, labels, ModeKeys.TRAIN)

            # Explicitly add a Saver
            if not tf.get_collection(tf.GraphKeys.SAVERS):
                saver = tf.train.Saver(
                    sharded=True,
                    defer_build=True,
                    save_relative_paths=True)  # Must set for portability
                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)

            self._bridge.connect()

            try:
                with tf.train.MonitoredTrainingSession(
                    master=target,
                    config=config,
                    is_chief=(self._worker_rank == 0),
                    checkpoint_dir=checkpoint_path,
                    save_checkpoint_steps=save_checkpoint_steps,
                    save_checkpoint_secs=save_checkpoint_secs,
                    hooks=spec.training_hooks) as sess:
                    iter_id = 0
                    while not sess.should_stop():
                        self._bridge.start(iter_id)
                        logging.debug('after bridge start.')
                        sess.run(spec.train_op, feed_dict={})
                        logging.debug('after session run.')
                        self._bridge.commit()
                        logging.debug('after bridge commit.')
                        iter_id += 1
                if self._cluster_spec is not None:
                    self._cheif_barriar(is_chief=(self._worker_rank == 0))
            finally:
                self._bridge.terminate()

        return self
Ejemplo n.º 30
0
def main(unused_argv):

  # pylint:disable=invalid-name
  # Reason:
  #   Following variables have their name consider to be invalid by pylint so
  #   we disable the warning.
  #   - Variable that is class

  del unused_argv

  use_gaussian_pretrained_model = FLAGS.use_gaussian_pretrained_model

  gen_ckpt_dir = FLAGS.gen_ckpt_dir
  inception_ckpt_dir = FLAGS.inception_ckpt_dir

  # TF init
  tf.reset_default_graph()
  # - generative model
  graph_gan = tf.Graph()
  with graph_gan.as_default():
    sess_gan = tf.Session(graph=graph_gan)
    if use_gaussian_pretrained_model:
      saver_gan = tf.train.import_meta_graph(
          os.path.join(gen_ckpt_dir, '..', 'infer', 'infer.meta'))
      saver_gan.restore(sess_gan, os.path.join(gen_ckpt_dir, 'model.ckpt'))
    else:
      saver_gan = tf.train.import_meta_graph(
          os.path.join(gen_ckpt_dir, 'infer.meta'))
      saver_gan.restore(sess_gan, os.path.join(gen_ckpt_dir, 'model.ckpt'))
  # - classifier (inception)
  graph_class = tf.Graph()
  with graph_class.as_default():
    sess_class = tf.Session(graph=graph_class)
    saver_class = tf.train.import_meta_graph(
        os.path.join(inception_ckpt_dir, 'infer.meta'))
    saver_class.restore(
        sess_class, os.path.join(inception_ckpt_dir, 'best_acc-103005'))

  # Generate: Tensor symbols
  z = graph_gan.get_tensor_by_name('z:0')
  G_z = graph_gan.get_tensor_by_name('G_z:0')[:, :, 0]
  # G_z_spec = graph_gan.get_tensor_by_name('G_z_spec:0')
  # Classification: Tensor symbols
  x = graph_class.get_tensor_by_name('x:0')
  scores = graph_class.get_tensor_by_name('scores:0')

  # Sample something AND classify them

  output_dir = FLAGS.latent_dir

  tf.gfile.MakeDirs(output_dir)

  np.random.seed(19260817)
  total_per_label = FLAGS.total_per_label
  top_per_label = FLAGS.top_per_label
  group_by_label = [[] for _ in range(10)]
  batch_size = 200
  hidden_dim = 100

  with tqdm(desc='min label count', unit=' #', total=total_per_label) as pbar:
    label_count = [0] * 10
    last_min_label_count = 0
    while True:
      min_label_count = min(label_count)
      pbar.update(min_label_count - last_min_label_count)
      last_min_label_count = min_label_count

      if use_gaussian_pretrained_model:
        _z = np.random.randn(batch_size, hidden_dim)
      else:
        _z = (np.random.rand(batch_size, hidden_dim) * 2.) - 1.
      # _G_z, _G_z_spec = sess_gan.run([G_z, G_z_spec], {z: _z})
      _G_z = sess_gan.run(G_z, {z: _z})
      _x = _G_z
      _scores = sess_class.run(scores, {x: _x})
      _max_scores = np.max(_scores, axis=1)
      _labels = np.argmax(_scores, axis=1)
      for i in range(batch_size):
        label = _labels[i]

        group_by_label[label].append((_max_scores[i], (_z[i], _G_z[i])))
        label_count[label] += 1

        if len(group_by_label[label]) >= top_per_label * 2:
          # remove unneeded tails
          group_by_label[label].sort(key=operator.itemgetter(0), reverse=True)
          group_by_label[label] = group_by_label[label][:top_per_label]

      if last_min_label_count >= total_per_label:
        break

  for label in range(10):
    group_by_label[label].sort(key=operator.itemgetter(0), reverse=True)
    group_by_label[label] = group_by_label[label][:top_per_label]

  # output a few samples as image
  image_output_dir = os.path.join(output_dir, 'sample_iamge')
  tf.gfile.MakeDirs(image_output_dir)

  for label in range(10):
    group_by_label[label].sort(key=operator.itemgetter(0), reverse=True)
    index = 0
    for confidence, (
        _,
        this_G_z,
    ) in group_by_label[label][:10]:
      output_basename = 'predlabel=%d_index=%02d_confidence=%.6f' % (
          label, index, confidence)
      wavfile.write(
          filename=os.path.join(
              image_output_dir, output_basename + '_sound.wav'),
          rate=16000,
          data=this_G_z)

  # Make Numpy arrays and save everything as an npz file
  array_label, array_z, array_G_z = [], [], []
  for label in range(10):
    for _, blob in group_by_label[label]:
      this_z, this_G_z = blob[:2]
      array_label.append(label)
      array_z.append(this_z)
      array_G_z.append(this_G_z)
  array_label = np.array(array_label, dtype='i')
  array_z = np.array(array_z)
  array_G_z = np.array(array_G_z)

  np.savez(
      os.path.join(output_dir, 'data_train.npz'),
      label=array_label,
      z=array_z,
      G_z=array_G_z,
  )