Beispiel #1
1
def main():
    tf.set_random_seed(10)
    with tf.Session() as sess:
        rnn_cell = tf.nn.rnn_cell.LSTMCell(10)

        # defining initial state
        initial_state = rnn_cell.zero_state(4, dtype=tf.float32)

        inputs = tf.Variable(tf.random_uniform(shape = (4, 30, 100)), name='input')
        inputs = tf.identity(inputs, "input_node")

        # 'state' is a tensor of shape [batch_size, cell_state_size]
        outputs, state = tf.nn.dynamic_rnn(rnn_cell, inputs, initial_state=initial_state, dtype=tf.float32)

        y1 = tf.identity(outputs, 'outputs')
        y2 = tf.identity(state, 'state')

        t1 = tf.ones([4, 30, 10])
        t2 = tf.ones([4, 10])

        loss = tf.reduce_sum((y1 - t1) * (y1 - t1)) + tf.reduce_sum((y2 - t2) * (y2 - t2))
        tf.identity(loss, name = "lstm_loss")
        # tf.summary.FileWriter('/tmp/log', tf.get_default_graph())

        net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split(','))
        run_model(net_outputs, argv[1], None, argv[3] == 'True')
  def testGradient(self):
    # Set graph seed for determinism.
    random_seed = 42
    tf.set_random_seed(random_seed)

    with self.test_session():
      for test_case in self._TEST_CASES:
        np.random.seed(random_seed)
        in_shape = test_case['in_shape']
        in_val = tf.constant(np.random.random(in_shape),
                             dtype=tf.float32)

        for padding in ['VALID', 'SAME']:
          out_val = tf.extract_image_patches(in_val,
                                             test_case['ksizes'],
                                             test_case['strides'],
                                             test_case['rates'],
                                             padding)
          out_shape = out_val.get_shape().as_list()

          err = tf.test.compute_gradient_error(
              in_val, in_shape, out_val, out_shape
          )

          print('extract_image_patches gradient err: %.4e' % err)
          self.assertLess(err, 1e-4)
Beispiel #3
0
 def testAtrousFullyConvolutionalValues(self):
   """Verify dense feature extraction with atrous convolution."""
   nominal_stride = 32
   for output_stride in [4, 8, 16, 32, None]:
     with slim.arg_scope(resnet_utils.resnet_arg_scope()):
       with tf.Graph().as_default():
         with self.test_session() as sess:
           tf.set_random_seed(0)
           inputs = create_test_input(2, 81, 81, 3)
           # Dense feature extraction followed by subsampling.
           output, _ = self._resnet_small(inputs, None,
                                          is_training=False,
                                          global_pool=False,
                                          output_stride=output_stride)
           if output_stride is None:
             factor = 1
           else:
             factor = nominal_stride // output_stride
           output = resnet_utils.subsample(output, factor)
           # Make the two networks use the same weights.
           tf.get_variable_scope().reuse_variables()
           # Feature extraction at the nominal network rate.
           expected, _ = self._resnet_small(inputs, None,
                                            is_training=False,
                                            global_pool=False)
           sess.run(tf.global_variables_initializer())
           self.assertAllClose(output.eval(), expected.eval(),
                               atol=1e-4, rtol=1e-4)
Beispiel #4
0
  def testResumeTrainAchievesRoughlyTheSameLoss(self):
    logdir = os.path.join(tempfile.mkdtemp(prefix=self.get_temp_dir()),
                          'tmp_logs')
    number_of_steps = [300, 301, 305]

    for i in range(len(number_of_steps)):
      with tf.Graph().as_default():
        tf.set_random_seed(i)
        tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
        tf_labels = tf.constant(self._labels, dtype=tf.float32)

        tf_predictions = LogisticClassifier(tf_inputs)
        slim.losses.log_loss(tf_predictions, tf_labels)
        total_loss = slim.losses.get_total_loss()

        optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

        train_op = slim.learning.create_train_op(
            total_loss, optimizer)

        loss = slim.learning.train(
            train_op, logdir, number_of_steps=number_of_steps[i],
            log_every_n_steps=10)
        self.assertIsNotNone(loss)
        self.assertLess(loss, .015)
def gradient_memory_mbs():
  """Evaluates gradient, prints peak memory."""
  start_time0 = time.perf_counter()
  start_time = start_time0
  tf.reset_default_graph()
  tf.set_random_seed(1)
  
  train_op, loss = create_train_op_and_loss()
  print("Graph construction: %.2f ms" %(1000*(time.perf_counter()-start_time)))

  g = tf.get_default_graph()
  ops = g.get_operations()
  
  for op in ge.filter_ops_from_regex(ops, "block_layer"):
    tf.add_to_collection("checkpoints", op.outputs[0])

  sess = create_session()
  sessrun(tf.global_variables_initializer())
  start_time = time.perf_counter()
  sessrun(train_op)
  start_time = time.perf_counter()
  print("loss %f"%(sess.run(loss),))
  
  print("Compute time: %.2f ms" %(1000*(time.perf_counter()-start_time)))

  mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6
  print("Memory used: %.2f MB "%(mem_use))
  total_time = time.perf_counter()-start_time0
  assert total_time < 100
  return mem_use
  def testSplitApplyMerge(self):
    # Repeatability.  SGD has a tendency to jump around, even here.
    tf.set_random_seed(1)

    with self.test_session() as sess:
      # Use sampling to train REINFORCE
      with st.value_type(st.SampleAndReshapeValue(n=1)):
        (route_selection,
         routing_loss,
         final_loss) = build_split_apply_merge_model()

      sgd = tf.train.GradientDescentOptimizer(1.0).minimize(final_loss)

      tf.global_variables_initializer().run()

      for i in range(10):
        # Run loss and inference step.  This toy problem converges VERY quickly.
        (routing_loss_v, final_loss_v, route_selection_v, _) = sess.run(
            [routing_loss, final_loss, tf.identity(route_selection), sgd])
        print(
            "Iteration %d, routing loss: %s, final_loss: %s, "
            "route selection: %s"
            % (i, routing_loss_v, final_loss_v, route_selection_v))

      self.assertAllEqual([0, 0, 1, 1], route_selection_v)
      self.assertAllClose([0.0, 0.0, 0.0, 0.0], routing_loss_v)
      self.assertAllClose(0.0, final_loss_v)
Beispiel #7
0
  def testNoneGlobalStep(self):
    with tf.Graph().as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      tf_predictions = BatchNormClassifier(tf_inputs)
      slim.losses.log_loss(tf_predictions, tf_labels)
      total_loss = slim.losses.get_total_loss()
      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

      train_op = slim.learning.create_train_op(total_loss,
                                               optimizer,
                                               global_step=None)

      global_step = slim.get_or_create_global_step()

      with tf.Session() as sess:
        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        for _ in range(10):
          sess.run([train_op])
        global_step = global_step.eval()
        # Since train_op don't use global_step it shouldn't change.
        self.assertAllClose(global_step, 0)
Beispiel #8
0
  def testEmptyUpdateOps(self):
    with tf.Graph().as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      tf_predictions = BatchNormClassifier(tf_inputs)
      slim.losses.log_loss(tf_predictions, tf_labels)
      total_loss = slim.losses.get_total_loss()
      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

      train_op = slim.learning.create_train_op(total_loss, optimizer,
                                               update_ops=[])

      moving_mean = tf.contrib.framework.get_variables_by_name('moving_mean')[0]
      moving_variance = tf.contrib.framework.get_variables_by_name(
          'moving_variance')[0]

      with tf.Session() as sess:
        # Initialize all variables
        sess.run(tf.global_variables_initializer())
        mean, variance = sess.run([moving_mean, moving_variance])
        # After initialization moving_mean == 0 and moving_variance == 1.
        self.assertAllClose(mean, [0] * 4)
        self.assertAllClose(variance, [1] * 4)

        for _ in range(10):
          sess.run([train_op])
        mean = moving_mean.eval()
        variance = moving_variance.eval()
        # Since we skip update_ops the moving_vars are not updated.
        self.assertAllClose(mean, [0] * 4)
        self.assertAllClose(variance, [1] * 4)
Beispiel #9
0
    def __init__(self):
        # Import data
        error = None
        for _ in range(10):
            try:
                self.mnist = input_data.read_data_sets(
                    "/tmp/tensorflow/mnist/input_data", one_hot=True)
                error = None
                break
            except Exception as e:
                error = e
                time.sleep(5)
        if error:
            raise ValueError("Failed to import data", error)

        # Set seed and build layers
        tf.set_random_seed(0)

        self.x = tf.placeholder(tf.float32, [None, 784], name="x")
        self.y_ = tf.placeholder(tf.float32, [None, 10], name="y_")
        y_conv, self.keep_prob = deepnn(self.x)

        # Need to define loss and optimizer attributes
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=self.y_, logits=y_conv))
        self.optimizer = tf.train.AdamOptimizer(1e-4)
        self.variables = ray_tf_utils.TensorFlowVariables(
            self.loss, tf.get_default_session())

        # For evaluating test accuracy
        correct_prediction = tf.equal(
            tf.argmax(y_conv, 1), tf.argmax(self.y_, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def initialize_parameters():
    """
    Initializes parameters to build a neural network with tensorflow. The shapes are:
                        W1 : [25, 12288]
                        b1 : [25, 1]
                        W2 : [12, 25]
                        b2 : [12, 1]
                        W3 : [6, 12]
                        b3 : [6, 1]
    
    Returns:
    parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3
    """
    
    tf.set_random_seed(1)                   # so that your "random" numbers match ours
        
    ### START CODE HERE ### (approx. 6 lines of code)
    W1 = tf.get_variable("W1", [25,12288], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", [25,1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", [12,25], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", [12,1], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", [6,12], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b3 = tf.get_variable("b3", [6,1], initializer = tf.zeros_initializer())
    ### END CODE HERE ###

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    
    return parameters
Beispiel #11
0
  def construct_graph(self, training, seed):
    """Returns a TensorflowGraph object."""
    graph = tf.Graph()

    # Lazily created by _get_shared_session().
    shared_session = None

    # Cache of TensorFlow scopes, to prevent '_1' appended scope names
    # when subclass-overridden methods use the same scopes.
    name_scopes = {}

    # Setup graph
    with graph.as_default():
      if seed is not None:
        tf.set_random_seed(seed)
      (output, labels, weights) = self.build(graph, name_scopes, training)

    if training:
      loss = self.add_training_cost(graph, name_scopes, output, labels, weights)
    else:
      loss = None
      output = self.add_output_ops(graph, output)  # add softmax heads
    return TensorflowGraph(
        graph=graph,
        session=shared_session,
        name_scopes=name_scopes,
        output=output,
        labels=labels,
        weights=weights,
        loss=loss)
Beispiel #12
0
def wide_model(numeric_input, category_input, vocabs):
    transpose_category_input = tf.transpose(category_input)
    category_sum = None
    # Append embadding category to numeric_sum
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable("wideem" + str(i), [vocabs[i], 8],
                                    initializer=tf.contrib.layers.xavier_initializer()
                                    #partitioner=tf.fixed_size_partitioner(n_pss))
                                    #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
                                    )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        # Same as make [0001]*[w1,w2,w3,w4] = lookup w4
        #embedded_col = embedding_lookup(tf.identity(embedding), col)  # number * embedding output number
        embedded_col = embedding_ops.embedding_lookup_unique(embedding, col)

        if category_sum is None:
            category_sum = embedded_col
        else:
            category_sum = tf.concat([category_sum, embedded_col], 1)

    tf.set_random_seed(1)
    w = tf.get_variable("W", [numeric_input.shape[1] + category_sum.shape[1], 1], initializer=tf.contrib.layers.xavier_initializer())
    wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1), w)

    return wmodel_logits_sum
Beispiel #13
0
  def testGradientWithZeroWeight(self):
    with tf.Graph().as_default():
      tf.set_random_seed(0)

      inputs = tf.ones((2, 3))
      weights = tf.get_variable('weights',
                                shape=[3, 4],
                                initializer=tf.truncated_normal_initializer())
      predictions = tf.matmul(inputs, weights)

      optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
      loss = tf.contrib.losses.mean_pairwise_squared_error(
          predictions,
          predictions,
          0)

      gradients_to_variables = optimizer.compute_gradients(loss)

      init_op = tf.initialize_all_variables()

      with self.test_session() as sess:
        sess.run(init_op)
        for grad, _ in gradients_to_variables:
          np_grad = sess.run(grad)
          self.assertFalse(np.isnan(np_grad).any())
Beispiel #14
0
  def _do_sampling(self, logits, num_samples, sampler):
    """Samples using the supplied sampler and inputs.

    Args:
      logits: Numpy ndarray of shape [batch_size, num_classes].
      num_samples: Int; number of samples to draw.
      sampler: A sampler function that takes (1) a [batch_size, num_classes]
        Tensor, (2) num_samples and returns a [batch_size, num_samples] Tensor.

    Returns:
      Frequencies from sampled classes; shape [batch_size, num_classes].
    """
    with self.test_session() as sess:
      tf.set_random_seed(1618)
      op = sampler(tf.constant(logits), num_samples)
      d = sess.run(op)

    batch_size, num_classes = logits.shape
    freqs_mat = []
    for i in range(batch_size):
      cnts = dict(collections.Counter(d[i, :]))
      freqs = [(cnts[k] * 1. / num_samples if k in cnts else 0)
               for k in range(num_classes)]
      freqs_mat.append(freqs)

    return freqs_mat
Beispiel #15
0
def main(_):
    with tf.Session() as sess:

        env = gym.make(ENV_NAME)
        np.random.seed(RANDOM_SEED)
        tf.set_random_seed(RANDOM_SEED)
        env.seed(RANDOM_SEED)

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        # Ensure action bound is symmetric
        assert (env.action_space.high == -env.action_space.low)

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             ACTOR_LEARNING_RATE, TAU)

        critic = CriticNetwork(sess, state_dim, action_dim,
                               CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars())

        if GYM_MONITOR_EN:
            if not RENDER_ENV:
                env = wrappers.Monitor(
                    env, MONITOR_DIR, video_callable=False, force=True)
            else:
                env = wrappers.Monitor(env, MONITOR_DIR, force=True)

        train(sess, env, actor, critic)

        if GYM_MONITOR_EN:
            env.monitor.close()
Beispiel #16
0
    def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0,
                 random_seed=None):
        Model.__init__(self)
        init_vars = [('w', [input_dim, output_dim], 'xavier', dtype),
                     ('b', [output_dim], 'zero', dtype)]
        self.graph = tf.Graph()
        with self.graph.as_default():
            if random_seed is not None:
                tf.set_random_seed(random_seed)
            self.X = tf.sparse_placeholder(dtype)
            self.y = tf.placeholder(dtype)
            self.vars = utils.init_var_map(init_vars, init_path)  # 初始化变量w, b

            w = self.vars['w']
            b = self.vars['b']
            xw = tf.sparse_tensor_dense_matmul(self.X, w)
            logits = tf.reshape(xw + b, [-1])
            self.y_prob = tf.sigmoid(logits)

            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \
                        l2_weight * tf.nn.l2_loss(xw)
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss)

            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config)
            tf.global_variables_initializer().run(session=self.sess)
Beispiel #17
0
  def testSampleMultipleTimes(self):
    # 5 component mixture.
    logits = [-10.0, -5.0, 0.0, 5.0, 10.0]
    mus = [-5.0, 0.0, 5.0, 4.0, 20.0]
    sigmas = [0.1, 5.0, 3.0, 0.2, 4.0]

    n = 100

    tf.set_random_seed(654321)
    components = [
        tfd.Normal(loc=mu, scale=sigma) for mu, sigma in zip(mus, sigmas)
    ]
    cat = tfd.Categorical(logits, dtype=tf.int32, name="cat1")
    dist1 = tfd.Mixture(
        cat,
        components,
        name="mixture1",
        use_static_graph=self.use_static_graph)
    samples1 = self.evaluate(dist1.sample(n, seed=123456))

    tf.set_random_seed(654321)
    components2 = [
        tfd.Normal(loc=mu, scale=sigma) for mu, sigma in zip(mus, sigmas)
    ]
    cat2 = tfd.Categorical(logits, dtype=tf.int32, name="cat2")
    dist2 = tfd.Mixture(
        cat2,
        components2,
        name="mixture2",
        use_static_graph=self.use_static_graph)
    samples2 = self.evaluate(dist2.sample(n, seed=123456))

    self.assertAllClose(samples1, samples2)
Beispiel #18
0
 def _runSamplingBenchmark(self, name, create_distribution, use_gpu,
                           num_components, batch_size, num_features,
                           sample_size):
   config = tf.ConfigProto()
   config.allow_soft_placement = True
   np.random.seed(127)
   with tf.Session(config=config, graph=tf.Graph()) as sess:
     tf.set_random_seed(0)
     with tf.device("/device:GPU:0" if use_gpu else "/cpu:0"):
       mixture = create_distribution(
           num_components=num_components,
           batch_size=batch_size,
           num_features=num_features)
       sample_op = mixture.sample(sample_size).op
       sess.run(tf.global_variables_initializer())
       reported = self.run_op_benchmark(
           sess,
           sample_op,
           min_iters=10,
           name=("%s_%s_components_%d_batch_%d_features_%d_sample_%d" %
                 (name, use_gpu, num_components, batch_size, num_features,
                  sample_size)))
       tf.logging.vlog(2, "\t".join(["%s", "%d", "%d", "%d", "%d", "%g"]) % (
           use_gpu, num_components, batch_size, num_features, sample_size,
           reported["wall_time"]))
Beispiel #19
0
def main(_):
  # Fixed seed for repeatability
  seed = 8964
  tf.set_random_seed(seed)
  np.random.seed(seed)
  random.seed(seed)

  if FLAGS.legacy_mode and FLAGS.seq_length < 3:
    raise ValueError('Legacy mode supports sequence length > 2 only.')

  if not gfile.Exists(FLAGS.checkpoint_dir):
    gfile.MakeDirs(FLAGS.checkpoint_dir)

  train_model = model.Model(data_dir=FLAGS.data_dir,
                            is_training=True,
                            learning_rate=FLAGS.learning_rate,
                            beta1=FLAGS.beta1,
                            reconstr_weight=FLAGS.reconstr_weight,
                            smooth_weight=FLAGS.smooth_weight,
                            ssim_weight=FLAGS.ssim_weight,
                            icp_weight=FLAGS.icp_weight,
                            batch_size=FLAGS.batch_size,
                            img_height=FLAGS.img_height,
                            img_width=FLAGS.img_width,
                            seq_length=FLAGS.seq_length,
                            legacy_mode=FLAGS.legacy_mode)

  train(train_model, FLAGS.pretrained_ckpt, FLAGS.checkpoint_dir,
        FLAGS.train_steps, FLAGS.summary_freq)
  def construct_graph(self, training, seed):
    """Returns a TensorflowGraph object."""
    graph = tf.Graph()

    # Lazily created by _get_shared_session().
    shared_session = None

    # Cache of TensorFlow scopes, to prevent '_1' appended scope names
    # when subclass-overridden methods use the same scopes.
    name_scopes = {}

    # Setup graph
    with graph.as_default():
      if seed is not None:
        tf.set_random_seed(seed)
      features, labels, weights = self.add_placeholders(graph, name_scopes)
      outputs = self.add_progressive_lattice(graph, name_scopes, training)

      if training:
        loss = self.add_task_training_costs(graph, name_scopes, outputs, labels,
                                            weights)
      else:
        loss = None
    return TensorflowGraph(
        graph=graph,
        session=shared_session,
        name_scopes=name_scopes,
        output=outputs,
        labels=labels,
        weights=weights,
        loss=loss)
Beispiel #21
0
    def testProbabilitiesCanBeChanged(self):
        # Set up graph.
        tf.set_random_seed(1234)
        lbl1 = 0
        lbl2 = 3
        # This cond allows the necessary class queues to be populated.
        label = tf.cond(tf.greater(0.5, tf.random_uniform([])), lambda: tf.constant(lbl1), lambda: tf.constant(lbl2))
        val = [np.array([1, 4]) * label]
        probs = tf.placeholder(tf.float32, shape=[5])
        batch_size = 2

        data_batch, labels = tf.contrib.training.stratified_sample_unknown_dist(val, label, probs, batch_size)

        with self.test_session() as sess:
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for _ in range(5):
                [data], lbls = sess.run([data_batch, labels], feed_dict={probs: [1, 0, 0, 0, 0]})
                for data_example in data:
                    self.assertListEqual([0, 0], list(data_example))
                self.assertListEqual([0, 0], list(lbls))

            # Now change distribution and expect different output.
            for _ in range(5):
                [data], lbls = sess.run([data_batch, labels], feed_dict={probs: [0, 0, 0, 1, 0]})
                for data_example in data:
                    self.assertListEqual([3, 12], list(data_example))
                self.assertListEqual([3, 3], list(lbls))

            coord.request_stop()
            coord.join(threads)
  def _train_model(self, checkpoint_dir, num_steps):
    """Trains a simple classification model.

    Note that the data has been configured such that after around 300 steps,
    the model has memorized the dataset (e.g. we can expect %100 accuracy).

    Args:
      checkpoint_dir: The directory where the checkpoint is written to.
      num_steps: The number of steps to train for.
    """
    with tf.Graph().as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      tf_predictions = logistic_classifier(tf_inputs)
      loss = tf.contrib.losses.log_loss(tf_predictions, tf_labels)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      train_op = tf.contrib.training.create_train_op(loss, optimizer)

      loss = tf.contrib.training.train(
          train_op, checkpoint_dir, hooks=[
              tf.train.StopAtStepHook(num_steps)
          ])
Beispiel #23
0
  def testCreateOnecloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      model_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                    num_ps_tasks=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
      self.assertEqual(len(slim.get_variables()), 5)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 2)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
Beispiel #24
0
  def testCreateMulticloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                    num_ps_tasks=2)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      for i, v in enumerate(slim.get_variables()):
        t = i % 2
        self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)
        self.assertDeviceEqual(v.device, v.value().device)
      self.assertEqual(len(clones), 2)
      for i, clone in enumerate(clones):
        self.assertEqual(
            clone.outputs.op.name,
            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
        self.assertEqual(clone.scope, 'clone_%d/' % i)
        self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)
Beispiel #25
0
  def testTrainWithTrace(self):
    logdir = os.path.join(tempfile.mkdtemp(prefix=self.get_temp_dir()),
                          'tmp_logs')
    with tf.Graph().as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      tf_predictions = LogisticClassifier(tf_inputs)
      slim.losses.log_loss(tf_predictions, tf_labels)
      total_loss = slim.losses.get_total_loss()
      tf.summary.scalar('total_loss', total_loss)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

      train_op = slim.learning.create_train_op(total_loss, optimizer)

      loss = slim.learning.train(
          train_op,
          logdir,
          number_of_steps=300,
          log_every_n_steps=10,
          trace_every_n_steps=100)
    self.assertIsNotNone(loss)
    for trace_step in [1, 101, 201]:
      trace_filename = 'tf_trace-%d.json' % trace_step
      self.assertTrue(
          os.path.isfile(os.path.join(logdir, trace_filename)))
Beispiel #26
0
  def testCreateLogisticClassifier(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = LogisticClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      clone = clones[0]
      self.assertEqual(len(slim.get_variables()), 2)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(clone.outputs.op.name,
                       'LogisticClassifier/fully_connected/Sigmoid')
      self.assertEqual(clone.scope, '')
      self.assertDeviceEqual(clone.device, 'GPU:0')
      self.assertEqual(len(slim.losses.get_losses()), 1)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(update_ops, [])
Beispiel #27
0
  def testCreateMulticlone(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      num_clones = 4
      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(len(clones), num_clones)
      for i, clone in enumerate(clones):
        self.assertEqual(
            clone.outputs.op.name,
            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
        self.assertEqual(len(update_ops), 2)
        self.assertEqual(clone.scope, 'clone_%d/' % i)
        self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
Beispiel #28
0
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
    def seeded_env_fn():
        env = env_fn()
        env.seed(0)
        return env

    np.random.seed(0)
    env = DummyVecEnv([seeded_env_fn])
    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
        tf.set_random_seed(0)
        model = learn_fn(env)
        sum_rew = 0
        done = True
        for i in range(n_trials):
            if done:
                obs = env.reset()
                state = model.initial_state
            if state is not None:
                a, v, state, _ = model.step(obs, S=state, M=[False])
            else:
                a, v, _, _ = model.step(obs)
            obs, rew, done, _ = env.step(a)
            sum_rew += float(rew)
        print("Reward in {} trials is {}".format(n_trials, sum_rew))
        assert sum_rew > min_reward_fraction * n_trials, \
            'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
Beispiel #29
0
def main(hps):

    # Initialize Horovod.
    hvd.init()

    # Create tensorflow session
    sess = tensorflow_session()

    # Download and load dataset.
    tf.set_random_seed(hvd.rank() + hvd.size() * hps.seed)
    np.random.seed(hvd.rank() + hvd.size() * hps.seed)

    # Get data and set train_its and valid_its
    train_iterator, test_iterator, data_init = get_data(hps, sess)
    hps.train_its, hps.test_its, hps.full_test_its = get_its(hps)

    # Create log dir
    logdir = os.path.abspath(hps.logdir) + "/"
    if not os.path.exists(logdir):
        os.mkdir(logdir)

    # Create model
    import model
    model = model.model(sess, hps, train_iterator, test_iterator, data_init)

    # Initialize visualization functions
    visualise = init_visualizations(hps, model, logdir)

    if not hps.inference:
        # Perform training
        train(sess, model, hps, logdir, visualise)
    else:
        infer(sess, model, hps, test_iterator)
	def __init__(self, env, discount = 0.90, learning_rate = 0.008):
		self.env = env
		self.observation_space = env.observation_space
		self.action_space = env.action_space
		self.action_space_n = self.action_space.n
		self.n_input = len(self.observation_space.high)
		self.n_hidden_1 = 20
		#Learning Parameters
		self.learning_rate = learning_rate 
		self.discount = discount
		self.num_epochs = 20   
		self.batch_size = 32 
		self.graph = tf.Graph()
		#Neural network is a Multi-Layered perceptron with one hidden layer containing tanh units
		with self.graph.as_default():
			tf.set_random_seed(1234)
			self.weights = {
			'h1': tf.Variable(tf.random_normal([self.n_input, self.n_hidden_1])),
			'out': tf.Variable(tf.random_normal([self.n_hidden_1, 1]))
			}
			self.biases = {
    		'b1': tf.Variable(tf.random_normal([self.n_hidden_1])),
    		'out': tf.Variable(tf.random_normal([1]))
			}
			self.state_input = self.x = tf.placeholder("float", [None, len(self.observation_space.high)])#State input
			self.return_input = tf.placeholder("float") #Target return
			self.value_pred = self.multilayer_perceptron(self.state_input, self.weights, self.biases)			
			self.loss = tf.reduce_mean(tf.pow(self.value_pred - self.return_input,2))			
			self.optim = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
			init = tf.initialize_all_variables()
		print("Value Graph Constructed")
		self.sess = tf.Session(graph = self.graph)
		self.sess.run(init)
Beispiel #31
0
def init_tf(config_dict=dict()):
    if tf.get_default_session() is None:
        tf.set_random_seed(np.random.randint(1 << 31))
        create_session(config_dict, force_as_default=True)
Beispiel #32
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        ## TODO initialize all of the TF variables (that were created by agent, etc.)
        ## HINT: use global_variables_initializer
        self.sess.run(tf.global_variables_initializer())
Beispiel #33
0
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt

tf.set_random_seed(1)
np.random.seed(1)

BATCH_SIZE = 50
LR = 0.001              # learning rate

mnist = input_data.read_data_sets('./data', one_hot=True)  # they has been normalized to range (0,1)
test_x = mnist.test.images[:2000]
test_y = mnist.test.labels[:2000]

# plot one example
print(mnist.train.images.shape)     # (55000, 28 * 28)
print(mnist.train.labels.shape)   # (55000, 10)
plt.imshow(mnist.train.images[0].reshape((28, 28)), cmap='gray')
plt.title('%i' % np.argmax(mnist.train.labels[0])); plt.show()

tf_x = tf.placeholder(tf.float32, [None, 28*28]) / 255.
image = tf.reshape(tf_x, [-1, 28, 28, 1])              # (batch, height, width, channel)
tf_y = tf.placeholder(tf.int32, [None, 10])            # input y

# CNN
conv1 = tf.layers.conv2d(   # shape (28, 28, 1)
    inputs=image,
    filters=16,
    kernel_size=5,
    strides=1,
Beispiel #34
0
#multi variable
import tensorflow as tf
tf.set_random_seed(777)

x_data = [[1,2],
          [2,3],
          [3, 1],
          [4,3],
          [5,3],
          [6,2]]
          
y_data = [[0],
          [0],
          [0],
          [1],
          [1],
          [1]
            ]


x = tf.placeholder(tf.float32, shape=[None,2])
y = tf.placeholder(tf.float32, shape=[None,1])

w = tf.Variable(tf.random_normal([2, 1]), name= 'weight')
                                #x의 열의 값과 동일해야한다. 
b = tf.Variable(tf.random_normal([1]), name= 'bias')

hypothesis = tf.sigmoid(tf.matmul(x, w) + b) # wx+b
# 5, 3 * 3*1 =[5,1]

# cost = tf.reduce_mean(tf.square(hypothesis - y))
Beispiel #35
0
from random import randint, uniform
import time
import matplotlib.pyplot as plt
from keras.losses import sparse_categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as t
import random
import cv2
import keras.backend as K
import tensorflow as tf
import pandas as pd
from keras_preprocessing import image
import random as rn
np.random.seed(42)
rn.seed(12345)
tf.set_random_seed(1234)

# -----------------------------------------------------------------------------------------------
# import the essential functions required for computation
# sys.path.insert(0, os.path.expanduser('~//CNN_networks'))
# sys.export PYTHONPATH=/home/yaurehman2/PycharmProjects/face_anti_sp_newidea

print(sys.path)
from cnn_networks.VGG16_A_GAP_dual_inp import cnn_hybrid_color_single
from ess_func import read_pairs, sample_people, prewhiten, store_loss, hog_to_tensor, custom_loss

# -----------------------------------------------------------------------------------------------


def main(args):
    # set the image parameters
@author: vp999274
"""

# keras module for building LSTM 
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 
import pickle

# set seeds for reproducability
from tensorflow import set_random_seed
from numpy.random import seed
set_random_seed(2)
seed(1)

import pandas as pd
import numpy as np
import string, os 

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)


curr_dir = 'input_data_headlines/'
all_headlines = []
#reading all files from the directory
for filename in os.listdir(curr_dir):
    def _asLoomOpTest(self, max_depth):
        with tf.Graph().as_default() as graph:
            tf.set_random_seed(8)
            np.random.seed(7)
            ts = loom.TypeShape(tf.int64, ())
            initializer = tf.random_uniform_initializer(dtype=tf.int64,
                                                        minval=0,
                                                        maxval=1 << 60)

            @model_utils.as_loom_op([ts, ts], ts)
            def f(x, y):
                # Use a variable to make sure variable sharing works correctly
                rand = tf.get_variable('rand',
                                       shape=(),
                                       dtype=tf.int64,
                                       initializer=initializer)
                return rand - x - y

            @model_utils.as_loom_op([ts, ts], [ts, ts])
            def g(x, y):
                # Test multiple outputs
                return x - y, x + y

            def make_h():
                # Ensure that we can reuse names for different calls to as_loom_op.
                # Also test that the name argument to as_loom_op works.
                @model_utils.as_loom_op([ts], ts, name='h')
                def not_h(x):
                    v = tf.get_variable('yo',
                                        shape=(),
                                        dtype=tf.int64,
                                        initializer=initializer)
                    return x + v

                return not_h

            # Make two h's to ensure they make separate variables
            h1 = make_h()
            h2 = make_h()

            simple_loom = loom.Loom(named_ops={
                'f': f,
                'g': g,
                'h1': h1,
                'h2': h2
            },
                                    max_depth=max_depth)
            self.assertEqual(['f/rand', 'h/yo', 'h_1/yo'],
                             [v.op.name for v in tf.global_variables()])

            # Use f twice and (g,h1,h2) once each
            weaver = simple_loom.make_weaver()
            x, y, z = np.random.randint(1 << 60, size=3)
            wx, wy, wz = weaver(x), weaver(y), weaver(z)
            weaver.add_output(weaver.f(wx, weaver.f(wy, wz)))
            plus, minus = weaver.g(wx, wy)
            weaver.add_output(plus)
            weaver.add_output(minus)
            weaver.add_output(weaver.h1(wx))
            weaver.add_output(weaver.h2(wx))

            with self.test_session(graph=graph):
                tf.global_variables_initializer().run()
                out = simple_loom.output_tensor(ts).eval(
                    weaver.build_feed_dict())
                self.assertEqual(out.shape, (5, ))
                # out[0] works only if variables are shared between layers:
                #   rand - x - (rand - y - z) = y + z - x
                self.assertEqual(out[0], y + z - x)
                # out[1] and out[2] are simple
                self.assertEqual(out[1], x - y)
                self.assertEqual(out[2], x + y)
                # out[3] and out[4] should use different random variables
                self.assertNotEqual(out[3], out[4])
from keras.utils import to_categorical
from keras.callbacks import Callback
from sklearn.metrics import roc_auc_score

from mmoe import MMoE

SEED = 1

# Fix numpy seed for reproducibility
np.random.seed(SEED)

# Fix random seed for reproducibility
random.seed(SEED)

# Fix TensorFlow graph-level seed for reproducibility
tf.set_random_seed(SEED)
tf_session = tf.Session(graph=tf.get_default_graph())
K.set_session(tf_session)


# Simple callback to print out ROC-AUC
class ROCCallback(Callback):
    def __init__(self, training_data, validation_data, test_data):
        self.train_X = training_data[0]
        self.train_Y = training_data[1]
        self.validation_X = validation_data[0]
        self.validation_Y = validation_data[1]
        self.test_X = test_data[0]
        self.test_Y = test_data[1]

    def on_train_begin(self, logs={}):
Beispiel #39
0
import argparse
import sys
import tempfile

import tensorflow as tf
import numpy as np
from imgaug import augmenters as iaa
from utils import *
from taylor_batch_norm_unshared import taylor
np.random.seed(0)
tf.set_random_seed(0)

clip = False

def lr(iteration):
    if iteration<3000:
        return 1e-3
    if iteration<10000:
        return 1e-3
    if iteration<15000:
        return 1e-4
    if iteration<20000:
        return 1e-5
    return 1e-4

def deepnn(x,n_classes):
    is_train = tf.placeholder(tf.bool)
    keep_prob = tf.placeholder(tf.float32)

    h1 = tf.layers.conv2d(x, filters=32, kernel_size=(3, 3), padding='SAME', kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    h = taylor(h1, k=2, is_train=is_train, name="Ac/1")
Beispiel #40
0
def main(_):
    # Import data
    mnist = FMNIST_aug()
    mnist.data_augmentation()
    n_classes = 10

    x = tf.placeholder(tf.float32, [None, 28, 28, 1])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, n_classes])

    print("Gradient Clipping Status: "+str(clip))

    learning_rate = tf.placeholder(tf.float32)
    # Build the graph for the deep net
    y_conv, is_train, keep_prob = deepnn(x, n_classes)

    with tf.name_scope('Loss'):
        def include_activation(name):
            return ('activation_coeff' in name)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                            logits=y_conv)
        cross_entropy = tf.reduce_mean(cross_entropy)
        graph = tf.get_default_graph()
        temp = [op.values()[0] for op in graph.get_operations() if ((len(op.values()) >= 1) and (include_activation(op.values()[0].name)))]
        regl1_loss = 0.01 * tf.add_n([tf.reduce_sum(0.01* tf.abs(tf.cast(v, tf.float32))) for v in temp]),
        regl2_loss =  0.01 * tf.add_n([tf.reduce_sum(0.01*tf.nn.l2_loss(tf.cast(v, tf.float32))) for v in temp])
        reg_loss = regl1_loss
        w_loss = tf.losses.get_regularization_loss()

    with tf.name_scope('Adam_optimizer'):
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            if clip == True:
                optimizer = tf.train.AdamOptimizer(learning_rate)
                gradients, variables = zip(*optimizer.compute_gradients(cross_entropy+reg_loss+w_loss))
                gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                # gradients = [
                    # None if gradient is None else tf.clip_by_norm(gradient, 5.0)
                    # for gradient in gradients]
                train_step = optimizer.apply_gradients(zip(gradients, variables))
            else:
                train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy+reg_loss)
                # train_step = tf.train.MomentumOptimizer(learning_rate, momentum=0.9).minimize(cross_entropy)

                reg_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(w_loss)
                train_step = tf.group(train_step, reg_step)

    with tf.name_scope('Accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

    graph_location = tempfile.mkdtemp()
    print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())
    acc = []
    tf.set_random_seed(0)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(20000):
            batch = mnist.next_train_batch(64, augment=False)
            if i % 100 == 0:
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], is_train: True, keep_prob: 0.8})
                a = []
                for batchx, batchy in iterate_minibatches(mnist.get_test_images(), mnist.get_test_labels(), 1000):
                    a.append(accuracy.eval(feed_dict={x: batchx, y_: batchy, is_train: False, keep_prob: 1.0}))
                print('Step %d, Training accuracy %g, Testing accuracy %g' % (i, train_accuracy, np.mean(a)))
                acc.append(a)

            train_step.run(feed_dict={x: batch[0], y_: batch[1], is_train: True, learning_rate: lr(i), keep_prob: 0.8})
from tqdm import tqdm

from ccks_all.cut_text import train_text_dic, all_text_dic, kb_all_text_dic, load_cut_text

from keras import backend as K

from ccks_all.modeling.bertmodel.preprocess import PairTokenizer, BertPreProcess, Preprocess, BertNerProcess
from ccks_all.modeling.utils import Metrics

max_q = 30
max_d = 450

seed = 123
random.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)

root_dir = r"D:\data\biendata\ccks2019_el\ccks_train_data\{}"

bert_dir = r'D:\data\bert\chinese-bert_chinese_wwm_L-12_H-768_A-12'

config_path = bert_dir + r"\bert_config.json"
checkpoint_path = bert_dir + r"\bert_model.ckpt"
dict_path = bert_dir + r'\vocab.txt'
bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)

# train_dir = r"C:\python3workspace\kera_ner_demo\ccks_ner\modeling\pair_model\dt\m3\{}"
model_dir = r"D:\data\biendata\ccks2019_el\entityclf\m21\{}"
log_filepath = model_dir.format(r"log")
model_path = model_dir.format(r"best_model.hdf5")
    def train(
        self,
        training_data: "TrainingData",
        cfg: Optional["RasaNLUModelConfig"] = None,
        **kwargs: Any,
    ) -> None:
        """Train the embedding intent classifier on a data set."""

        logger.debug("Started training embedding classifier.")

        # set numpy random seed
        np.random.seed(self.random_seed)

        session_data = self.preprocess_train_data(training_data)

        possible_to_train = self._check_enough_labels(session_data)

        if not possible_to_train:
            logger.error("Can not train a classifier. "
                         "Need at least 2 different classes. "
                         "Skipping training of classifier.")
            return

        if self.evaluate_on_num_examples:
            session_data, eval_session_data = train_utils.train_val_split(
                session_data,
                self.evaluate_on_num_examples,
                self.random_seed,
                label_key="label_ids",
            )
        else:
            eval_session_data = None

        self.graph = tf.Graph()
        with self.graph.as_default():
            # set random seed
            tf.set_random_seed(self.random_seed)

            # allows increasing batch size
            batch_size_in = tf.placeholder(tf.int64)

            (
                self._iterator,
                train_init_op,
                eval_init_op,
            ) = train_utils.create_iterator_init_datasets(
                session_data,
                eval_session_data,
                batch_size_in,
                self.batch_in_strategy,
                label_key="label_ids",
            )

            self._is_training = tf.placeholder_with_default(False, shape=())

            loss, acc = self._build_tf_train_graph(session_data)

            # define which optimizer to use
            self._train_op = tf.train.AdamOptimizer().minimize(loss)

            # train tensorflow graph
            self.session = tf.Session(config=self._tf_config)
            train_utils.train_tf_dataset(
                train_init_op,
                eval_init_op,
                batch_size_in,
                loss,
                acc,
                self._train_op,
                self.session,
                self._is_training,
                self.epochs,
                self.batch_in_size,
                self.evaluate_on_num_examples,
                self.evaluate_every_num_epochs,
            )

            # rebuild the graph for prediction
            self.pred_confidence = self._build_tf_pred_graph(session_data)
Beispiel #43
0
def main():
    args = parser.parse_args()
    print(args)
    log_dir = args.logdir + '/' + str(args.numX) + '/ep1st' + str(args.epsilon1) + '_ep2nd' + str(args.epsilon2) \
              + '_zeta' + str(args.zeta) + '_seed' + str(args.seed) + '_' + str(datetime.now())
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    with open(os.path.join(log_dir, 'args.txt'), 'w') as f:
        f.write(str(args))
    # set random seed
    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)
    # set gpu
    deviceIDs = GPUtil.getAvailable(order='first',
                                    limit=4,
                                    maxLoad=0.1,
                                    maxMemory=0.1,
                                    excludeID=[],
                                    excludeUUID=[])
    print('Unloaded gpu:', deviceIDs)
    os.environ['CUDA_VISIBLE_DEVICES'] = str(deviceIDs[0])

    # dataset 每个像素的数值是在0~1之间的
    # Dataset = np.load(os.environ['HOME'] + '/datasets/fashion/fashion.npz')
    Dataset = np.load('fashion.npz')
    Xul_train = Dataset['xtrain'][0:60000]  # 维数为(60000, 784)
    Yul_train = Dataset['ytrain'][0:60000]
    X_test = Dataset['xtest']
    Y_test = Dataset['ytest']
    # mask = np.random.choice(60000, 500, False)
    mask = np.arange(0, args.numX)
    X_train = Xul_train[mask]
    Y_train = Yul_train[mask]
    for i in range(10):
        print('class:', i, np.sum((Y_train[:, i] == 1)))

    # build graph
    lr = tf.placeholder(tf.float32, [], name='lr')
    x = tf.placeholder(tf.float32, [None, 784], name='x')
    y = tf.placeholder(tf.float32, [None, 10], name='y')
    x_ul = tf.placeholder(tf.float32, [None, 784], name='xul')

    vae = cVAE(args.latent_dim)
    net = Net()

    out = net.classifier(x)
    out_ul = net.classifier(x_ul)

    mu, logvar = vae.encode(x_ul, out_ul)
    z = vae.reparamenterize(mu, logvar)
    x_recon = vae.decode(z, out_ul)
    x_gen = vae.decode(
        tf.random_normal([tf.shape(out_ul)[0], args.latent_dim]), out_ul)

    # conditional vae graph
    vae_loss = vae.BCE(x_recon, x_ul, mu, logvar) + vae.KLD(mu, logvar)
    vae_weight_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope='cvae')
    vae_train_step = tf.train.AdamOptimizer(lr).minimize(
        vae_loss, var_list=vae_weight_list)

    # TNAR graph
    r0 = tf.zeros_like(z, name='zero_holder')  # [128 100]
    x_recon_r0 = vae.decode(z + r0, out_ul)  # [128 784]
    diff2 = 0.5 * tf.reduce_sum((x_recon - x_recon_r0)**2, axis=1)  # [128,]
    # tf.gradients(y, x) 等于 tf.gradients(tf.reduce_sum(y), x),其shape和x的shape一样
    diffJaco = tf.gradients(diff2, r0)[0]  # [128 100]

    def normalizevector(r):
        r /= (1e-12 + tf.reduce_max(tf.abs(r), axis=1, keepdims=True))
        return r / tf.sqrt(tf.reduce_sum(r**2, axis=1, keepdims=True) + 1e-6)

    # power method
    r_adv = normalizevector(tf.random_normal(shape=tf.shape(z)))
    for j in range(1):
        r_adv = 1e-6 * r_adv
        x_r = vae.decode(z + r_adv, out_ul)
        out_r = net.classifier(x_r - x_recon + x_ul)
        kl = net.kldivergence(out_ul, out_r)
        r_adv = tf.stop_gradient(tf.gradients(kl, r_adv)[0]) / 1e-6
        r_adv = normalizevector(r_adv)
        # begin cg
        rk = r_adv + 0
        pk = rk + 0
        xk = tf.zeros_like(rk)
        for k in range(4):
            Bpk = tf.stop_gradient(tf.gradients(diffJaco * pk, r0)[0])
            pkBpk = tf.reduce_sum(pk * Bpk, axis=1, keepdims=True)
            rk2 = tf.reduce_sum(rk * rk, axis=1, keepdims=True)
            alphak = (rk2 / (pkBpk + 1e-8)) * tf.cast((rk2 > 1e-8), tf.float32)
            xk += alphak * pk
            rk -= alphak * Bpk
            betak = tf.reduce_sum(rk * rk, axis=1,
                                  keepdims=True) / (rk2 + 1e-8)
            pk = rk + betak * pk
        # end cg
        r_adv = normalizevector(xk)
    x_adv = vae.decode(z + r_adv * args.epsilon1, out_ul)
    r_x = x_adv - x_recon
    out_adv = net.classifier(x_ul + r_x)
    r_x = normalizevector(r_x)

    r_adv_orth = normalizevector(tf.random_normal(shape=tf.shape(x_ul)))
    for j in range(1):
        r_adv_orth1 = 1e-6 * r_adv_orth
        out_r = net.classifier(x_ul + r_adv_orth1)
        kl = net.kldivergence(out_ul, out_r)
        r_adv_orth1 = tf.stop_gradient(tf.gradients(kl, r_adv_orth1)[0]) / 1e-6
        r_adv_orth = r_adv_orth1 - args.zeta * (
            tf.reduce_sum(r_x * r_adv_orth, axis=1, keepdims=True) *
            r_x) + args.zeta * r_adv_orth
        r_adv_orth = normalizevector(r_adv_orth)
    out_adv_orth = net.classifier(x_ul + r_adv_orth * args.epsilon2)  ###

    # TNAR loss
    vat_loss = net.kldivergence(tf.stop_gradient(out_ul), out_adv)
    vat_loss_orth = net.kldivergence(tf.stop_gradient(out_ul), out_adv_orth)
    en_loss = net.crossentropy(out_ul, out_ul)
    ce_loss = net.crossentropy(y, out)
    total_loss = ce_loss + args.coef_vat1 * vat_loss + args.coef_vat2 * vat_loss_orth + args.coef_ent * en_loss

    weight_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='net')
    train_step = tf.train.AdamOptimizer(lr).minimize(total_loss,
                                                     var_list=weight_list)

    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(out, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # summary
    train_summary_list = [
        tf.summary.scalar('loss/total', total_loss),
        tf.summary.scalar('loss/crossentropy', ce_loss),
        tf.summary.scalar('loss/entropy', en_loss),
        tf.summary.scalar('loss/vat', vat_loss),
        tf.summary.scalar('acc/train', accuracy),
    ]
    image_summary_list = [
        tf.summary.image('x',
                         tf.reshape(x_ul, [-1, 28, 28, 1]),
                         max_outputs=32),
        tf.summary.image('x_recon',
                         tf.reshape(x_recon, [-1, 28, 28, 1]),
                         max_outputs=32),
        tf.summary.image('x_gen',
                         tf.reshape(x_gen, [-1, 28, 28, 1]),
                         max_outputs=32),
        tf.summary.image('x_adv',
                         tf.reshape(x_adv, [-1, 28, 28, 1]),
                         max_outputs=32),
        tf.summary.image('r_adv',
                         tf.reshape(x_adv - x_recon, [-1, 28, 28, 1]),
                         max_outputs=32),
        tf.summary.image('r_adv_orth',
                         tf.reshape(r_adv_orth * args.epsilon2,
                                    [-1, 28, 28, 1]),
                         max_outputs=32),
        tf.summary.image('x_adv_orth',
                         tf.reshape(x_ul + r_adv_orth * args.epsilon2,
                                    [-1, 28, 28, 1]),
                         max_outputs=32)
    ]
    _acc = tf.placeholder(tf.float32, name='acc_summary')
    test_summary_list = [tf.summary.scalar('acc/test', _acc)]
    train_summary_merged = tf.summary.merge(train_summary_list +
                                            image_summary_list)
    test_summary_merged = tf.summary.merge(test_summary_list)

    saver = tf.train.Saver(max_to_keep=100)

    # optimization
    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        writer = tf.summary.FileWriter(log_dir, sess.graph)
        sess.run(tf.global_variables_initializer(), feed_dict={lr: args.lr})
        for ep in range(args.epoch):
            if ep < args.lr_decay_epoch:
                decayed_lr = args.lr
            else:
                decayed_lr = args.lr * (
                    args.epoch - ep) / float(args.epoch - args.lr_decay_epoch)

            for i in range(args.iter_per_epoch):
                mask = np.random.choice(len(X_train), args.batch_size, False)
                mask_ul = np.random.choice(len(Xul_train), args.batch_size_ul,
                                           False)
                # optimize cls
                _, loss_ce, loss_vat, loss_vat2, train_summary = sess.run(
                    [
                        train_step, ce_loss, vat_loss, vat_loss_orth,
                        train_summary_merged
                    ],
                    feed_dict={
                        x: X_train[mask],
                        y: Y_train[mask],
                        x_ul: Xul_train[mask_ul],
                        lr: decayed_lr,
                        vae.is_train: False
                    })
                # optimize vae
                _, loss_vae = sess.run([vae_train_step, vae_loss],
                                       feed_dict={
                                           x_ul: Xul_train[mask_ul],
                                           lr: decayed_lr,
                                           vae.is_train: True
                                       })

            acc = 0
            for j in range(20):
                acc += sess.run(accuracy,
                                feed_dict={
                                    x: X_test[500 * j:500 * (j + 1)],
                                    y: Y_test[500 * j:500 * (j + 1)]
                                })
            acc /= 20
            test_summary = sess.run(test_summary_merged,
                                    feed_dict={
                                        x: X_test[0:128],
                                        y: Y_test[0:128],
                                        _acc: acc
                                    })
            writer.add_summary(train_summary, ep)
            writer.add_summary(test_summary, ep)
            print('epoch', ep, 'ce', loss_ce, 'vat1', loss_vat, 'vat2',
                  loss_vat2, 'vae', loss_vae)
            print('epoch', ep, 'acc', acc)

            if ep % 10 == 0:
                saver.save(sess, os.path.join(log_dir, 'model'), ep)
    print(args)
Beispiel #44
0
flags.DEFINE_string("occlude_start_row", 18, "image row to start occlusion")
flags.DEFINE_string("num_generated_images", 9, "number of images to generate")

# Debug
flags.DEFINE_boolean("is_train", True, "training or testing")
flags.DEFINE_string("log_level", "INFO", "log level [DEBUG, INFO, WARNING, ERROR, CRITICAL]")
flags.DEFINE_integer("random_seed", 123, "random seed for python")

conf = flags.FLAGS

# logging
logger = logging.getLogger()
logger.setLevel(conf.log_level)

# random seed
tf.set_random_seed(conf.random_seed)
np.random.seed(conf.random_seed)


def validate_parameters(conf):
  if conf.data not in ["mnist", "color-mnist",  "cifar"]:
    raise ValueError("Configuration parameter 'data' is '{}'. Must be one of [mnist, color-mnist, cifar]"
                     .format(conf.data))


def preprocess(q_levels):
  
  def preprocess_fcn(images, labels):      
    # Create the target pixels from the image. Quantize the scalar pixel values into q_level indices.
    target_pixels = np.clip(((images * q_levels).astype('int64')), 0, q_levels - 1) # [N,H,W,C]
    return (images, target_pixels)
X_test = X_test_flatten/255.
# Convert training and test labels to one hot matrices
Y_train = one_hot_encoding(Y_train_orig, 6)
Y_test = one_hot_encoding(Y_test_orig, 6)


print ("train set size" + str(X_train.shape[1]))
print ("test set size" + str(X_test.shape[1]))
print ("X_train shape " + str(X_train.shape))
print ("Y_train shape " + str(Y_train.shape))
print ("X_test shape " + str(X_test.shape))
print ("Y_test shape " + str(Y_test.shape))
print("START TRAINING")

ops.reset_default_graph()                  # Clears the default graph stack and resets the global default graph. To  rerun the model without overwriting tf variables
tf.set_random_seed(1)                      # 1 to keep consistent results
(n_x, m) = X_train.shape                 # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0]                    # n_y : output size
costs = []                                       # To keep track of the cost
epoch_n = []
learning_rate = 0.0001
num_epochs = 1500
minibatch_size = 32 
seed=3


### CREATE PLACEHOLDERS
X = tf.placeholder(dtype = tf.float32, shape = [n_x, None], name = "X")
Y = tf.placeholder(dtype = tf.float32, shape = [n_y, None], name = "Y")

###INITIALIZE PARAMETERS
Beispiel #46
0
# Lab 11 MNIST and Deep learning CNN
import tensorflow as tf
import random

from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(777)  # reproducibility

mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

# hyper parameters
learning_rate = 0.001
trainig_epochs = 15
batch_size = 100

# dropout (keep_prob) rate 0.7~0.5 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# img 28x28x1 (black/white)
# input 사이즈는 모르기 때문에 -1로 셋팅
X_img = tf.reshape(X, [-1, 28, 28, 1])

# L1 ImageIn shape = (?, 28, 28, 1)
W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
#   Conv        -> (?, 28, 28, 32)
#   Pool        -> (?, 14, 14, 32)
L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
Beispiel #47
0
#     print(action)
#     observation, reward, done = env.step(action)


def one_hot(num, maximum):
    one_hot = np.zeros(maximum)
    one_hot[num] = 1
    return one_hot


# def main(_):
with tf.Session() as sess:
    train_step = 0
    env = env2D(ENVIRONMENT)
    np.random.seed(RANDOM_SEED)
    tf.set_random_seed(RANDOM_SEED)
    env.seed = random.seed(RANDOM_SEED)

    state_dim = env.observation_flat().shape[0]
    action_dim = env.action_space.shape[0]

    # create network
    net = Network(sess, state_dim, action_dim, LEARNING_RATE, TAU)

    # train(sess, env, net)

    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)
Beispiel #48
0
from model.model_fn import model_fn
from model.training import train_and_evaluate


parser = argparse.ArgumentParser()
parser.add_argument('--model_dir', default='experiments/base_model',
                    help="Experiment directory containing params.json")
parser.add_argument('--data_dir', default='data/processed_data',
                    help="Directory containing the dataset")
parser.add_argument('--restore_from', default=None,
                    help="Optional, directory or file containing weights to reload before training")


if __name__ == '__main__':
    # Set the random seed for the whole graph for reproducible experiments
    tf.set_random_seed(230)

    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Check that we are not overwriting some previous experiment
    # Comment these lines if you are developing your model and don't care about overwritting
    # model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights"))
    # overwritting = model_dir_has_best_weights and args.restore_from is None
    # assert not overwritting, "Weights found in model_dir, aborting to avoid overwrite"

    # Set the logger
    set_logger(os.path.join(args.model_dir, 'train.log'))
    scaling = args.scaling

    ### initialize random seed generator of numpy
    np.random.seed(random_seed)
    
    #--------------------------------------------------------------------
    # import keras and its backend (e.g., tensorflow)
    #--------------------------------------------------------------------
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    if gpu_id >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = ''
    os.environ['TF_CPP_MIN_LOG_LEVEL']='2'  # supress warning messages
    import tensorflow as tf
    tf.set_random_seed(random_seed)  # initialize random seed generator of tensorflow
    from keras.layers import Dense, Dropout
    from keras.models import Sequential, load_model
    
    train_df = pd.read_csv(path_train, header=0) # pass header=0 to be able to replace existing names
    train_AP_features = scale(np.asarray(train_df.iloc[:,0:520]).astype(float), axis=1) # convert integer to float and scale jointly (axis=1)
    train_df['REFPOINT'] = train_df.apply(lambda row: str(int(row['SPACEID'])) + str(int(row['RELATIVEPOSITION'])), axis=1) # add a new column
    
    # map reference points to sequential IDs per building-floor before building labels
    blds = np.unique(train_df[['BUILDINGID']])
    flrs = np.unique(train_df[['FLOOR']])    
    for bld in blds:
        for flr in flrs:
            cond = (train_df['BUILDINGID']==bld) & (train_df['FLOOR']==flr)
            _, idx = np.unique(train_df.loc[cond, 'REFPOINT'], return_inverse=True) # refer to numpy.unique manual
            train_df.loc[cond, 'REFPOINT'] = idx
Beispiel #50
0
def main():
    np.random.seed(1234)
    tf.set_random_seed(1237)
    M, N, train_data, valid_data, test_data, user_movie, user_movie_score, \
        movie_user, movie_user_score = dataset.load_movielens1m_mapped(
            os.path.join(conf.data_dir, 'ml-1m.zip'))

    # set configurations and hyper parameters
    D = 30
    batch_size = 100000
    K = 8
    n_epochs = 500
    eval_freq = 10

    # paralleled
    chunk_size = 50
    N = (N + chunk_size - 1) // chunk_size
    N *= chunk_size
    M = (M + chunk_size - 1) // chunk_size
    M *= chunk_size

    # Selection
    neighbor_u = tf.placeholder(tf.int32, shape=[None], name="neighbor_u")
    neighbor_v = tf.placeholder(tf.int32, shape=[None], name="neighbor_v")
    select_u = tf.placeholder(tf.int32, shape=[None], name="select_u")
    select_v = tf.placeholder(tf.int32, shape=[None], name="select_v")
    alpha_u = 1.0
    alpha_v = 1.0
    alpha_pred = 0.2 / 4.0

    # Define samples as variables
    Us = []
    Vs = []
    for i in range(N // chunk_size):
        ui = tf.get_variable('u_chunk_%d' % i,
                             shape=[K, chunk_size, D],
                             initializer=tf.random_normal_initializer(0, 0.1),
                             trainable=False)
        Us.append(ui)
    for i in range(M // chunk_size):
        vi = tf.get_variable('v_chunk_%d' % i,
                             shape=[K, chunk_size, D],
                             initializer=tf.random_normal_initializer(0, 0.1),
                             trainable=False)
        Vs.append(vi)
    U = tf.concat(Us, axis=1)
    V = tf.concat(Vs, axis=1)

    n = tf.placeholder(tf.int32, shape=[], name='n')
    m = tf.placeholder(tf.int32, shape=[], name='m')
    model = pmf(n, m, D, K, select_u, select_v, alpha_u, alpha_v, alpha_pred)

    # prediction
    true_rating = tf.placeholder(tf.float32, shape=[None], name='true_rating')
    normalized_rating = (true_rating - 1.0) / 4.0
    pred_rating = model.observe(u=U, v=V)["r"]
    pred_rating = tf.reduce_mean(pred_rating, axis=0)
    rmse = tf.sqrt(tf.reduce_mean(
        tf.square(pred_rating - normalized_rating))) * 4

    hmc_u = zs.HMC(step_size=1e-3,
                   n_leapfrogs=10,
                   adapt_step_size=None,
                   target_acceptance_rate=0.9)
    hmc_v = zs.HMC(step_size=1e-3,
                   n_leapfrogs=10,
                   adapt_step_size=None,
                   target_acceptance_rate=0.9)
    target_u = tf.gather(U, neighbor_u, axis=1)
    target_v = tf.gather(V, neighbor_v, axis=1)

    candidate_sample_u = tf.get_variable(
        'cand_sample_chunk_u',
        shape=[K, chunk_size, D],
        initializer=tf.random_normal_initializer(0, 0.1),
        trainable=True)
    candidate_sample_v = tf.get_variable(
        'cand_sample_chunk_v',
        shape=[K, chunk_size, D],
        initializer=tf.random_normal_initializer(0, 0.1),
        trainable=True)

    def log_joint(bn):
        log_pu, log_pv = bn.cond_log_prob(['u', 'v'])  # [K, N], [K, M]
        log_pr = bn.cond_log_prob('r')  # [K, batch]
        log_pu = tf.reduce_sum(log_pu, axis=-1)
        log_pv = tf.reduce_sum(log_pv, axis=-1)
        log_pr = tf.reduce_sum(log_pr, axis=-1)
        return log_pu + log_pv + log_pr

    model.log_joint = log_joint

    sample_u_op, sample_u_info = hmc_u.sample(model, {
        "r": normalized_rating,
        "v": target_v
    }, {"u": candidate_sample_u})
    sample_v_op, sample_v_info = hmc_v.sample(model, {
        "r": normalized_rating,
        "u": target_u
    }, {"v": candidate_sample_v})

    candidate_idx_u = tf.placeholder(tf.int32,
                                     shape=[chunk_size],
                                     name='cand_u_chunk')
    candidate_idx_v = tf.placeholder(tf.int32,
                                     shape=[chunk_size],
                                     name='cand_v_chunk')
    candidate_u = tf.gather(U, candidate_idx_u, axis=1)  # [K, chunk_size, D]
    candidate_v = tf.gather(V, candidate_idx_v, axis=1)  # [K, chunk_size, D]

    trans_cand_U = tf.assign(candidate_sample_u, candidate_u)
    trans_cand_V = tf.assign(candidate_sample_v, candidate_v)

    trans_us_cand = []
    for i in range(N // chunk_size):
        trans_us_cand.append(tf.assign(Us[i], candidate_sample_u))
    trans_vs_cand = []
    for i in range(M // chunk_size):
        trans_vs_cand.append(tf.assign(Vs[i], candidate_sample_v))

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, n_epochs + 1):
            epoch_time = -time.time()
            for i in range(N // chunk_size):
                nv, sv, tr, ssu, ssv = select_from_corpus(
                    i * chunk_size, (i + 1) * chunk_size, user_movie,
                    user_movie_score)
                _ = sess.run(trans_cand_U,
                             feed_dict={
                                 candidate_idx_u:
                                 list(
                                     range(i * chunk_size,
                                           (i + 1) * chunk_size))
                             })
                _ = sess.run(sample_u_op,
                             feed_dict={
                                 neighbor_v: sv,
                                 true_rating: tr,
                                 select_u: ssu,
                                 select_v: ssv,
                                 n: chunk_size,
                                 m: nv
                             })
                _ = sess.run(trans_us_cand[i])
            for i in range(M // chunk_size):
                nu, su, tr, ssv, ssu = select_from_corpus(
                    i * chunk_size, (i + 1) * chunk_size, movie_user,
                    movie_user_score)
                _ = sess.run(trans_cand_V,
                             feed_dict={
                                 candidate_idx_v:
                                 list(
                                     range(i * chunk_size,
                                           (i + 1) * chunk_size))
                             })
                _ = sess.run(sample_v_op,
                             feed_dict={
                                 neighbor_u: su,
                                 true_rating: tr,
                                 select_u: ssu,
                                 select_v: ssv,
                                 n: nu,
                                 m: chunk_size
                             })
                _ = sess.run(trans_vs_cand[i])
            epoch_time += time.time()
            print("Epoch {}: {:.1f}s".format(epoch, epoch_time))

            def _eval(phase, data, batch_size):
                rmses = []
                sizes = []
                time_eval = -time.time()
                n_iters = (data.shape[0] + batch_size - 1) // batch_size
                for t in range(n_iters):
                    su = data[t * batch_size:(t + 1) * batch_size, 0]
                    sv = data[t * batch_size:(t + 1) * batch_size, 1]
                    tr = data[t * batch_size:(t + 1) * batch_size, 2]
                    re = sess.run(rmse,
                                  feed_dict={
                                      select_u: su,
                                      select_v: sv,
                                      n: N,
                                      m: M,
                                      true_rating: tr
                                  })
                    rmses.append(re)
                    sizes.append(tr.shape[0])
                time_eval += time.time()
                print('>>> {} ({:.1f}s): rmse = {}'.format(
                    phase, time_eval, average_rmse_over_batches(rmses, sizes)))

            _eval("Train", train_data, batch_size)

            if epoch % eval_freq == 0:
                _eval("Validation", valid_data, batch_size)
                _eval("Test", test_data, batch_size)
def reset_graph(seed = 42):
    #to make the results reproducible across runs
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
Beispiel #52
0
def run(data_dir, data_fname, split_seed, ntrain_div_classes,
        attr_normalization, alpha, eps, topk, ppr_normalization, hidden_size,
        nlayers, weight_decay, dropout, lr, max_epochs, batch_size,
        batch_mult_val, eval_step, run_val, early_stop, patience,
        nprop_inference, inf_fraction):
    '''
    Run training and inference.

    Parameters
    ----------
    data_dir:
        Directory containing .npz data files.
    data_fname:
        Name of .npz data file.
    split_seed:
        Seed for splitting the dataset into train/val/test.
    ntrain_div_classes:
        Number of training nodes divided by number of classes.
    attr_normalization:
        Attribute normalization. Not used in the paper.
    alpha:
        PPR teleport probability.
    eps:
        Stopping threshold for ACL's ApproximatePR.
    topk:
        Number of PPR neighbors for each node.
    ppr_normalization:
        Adjacency matrix normalization for weighting neighbors.
    hidden_size:
        Size of the MLP's hidden layer.
    nlayers:
        Number of MLP layers.
    weight_decay:
        Weight decay used for training the MLP.
    dropout:
        Dropout used for training.
    lr:
        Learning rate.
    max_epochs:
        Maximum number of epochs (exact number if no early stopping).
    batch_size:
        Batch size for training.
    batch_mult_val:
        Multiplier for validation batch size.
    eval_step:
        Accuracy is evaluated after every this number of steps.
    run_val:
        Evaluate accuracy on validation set during training.
    early_stop:
        Use early stopping.
    patience:
        Patience for early stopping.
    nprop_inference:
        Number of propagation steps during inference
    inf_fraction:
        Fraction of nodes for which local predictions are computed during inference.
    '''

    start = time.time()
    (adj_matrix, attr_matrix, labels, train_idx, val_idx,
     test_idx) = utils.get_data(f"{data_dir}/{data_fname}",
                                seed=split_seed,
                                ntrain_div_classes=ntrain_div_classes,
                                normalize_attr=attr_normalization)
    try:
        d = attr_matrix.n_columns
    except AttributeError:
        d = attr_matrix.shape[1]
    nc = labels.max() + 1
    time_loading = time.time() - start
    logging.info('Loading done.')

    # compute the ppr vectors for train/val nodes using ACL's ApproximatePR
    start = time.time()
    topk_train = ppr.topk_ppr_matrix(adj_matrix,
                                     alpha,
                                     eps,
                                     train_idx,
                                     topk,
                                     normalization=ppr_normalization)
    if run_val:
        topk_val = ppr.topk_ppr_matrix(adj_matrix,
                                       alpha,
                                       eps,
                                       val_idx,
                                       topk,
                                       normalization=ppr_normalization)
    else:
        topk_val = None
    time_preprocessing = time.time() - start
    logging.info('Preprocessing done.')

    start = time.time()
    tf.reset_default_graph()
    tf.set_random_seed(0)

    model = pprgo.PPRGo(d,
                        nc,
                        hidden_size,
                        nlayers,
                        lr,
                        weight_decay,
                        dropout,
                        sparse_features=type(attr_matrix) is not np.ndarray)

    sess = tf.compat.v1.Session()
    with sess.as_default():
        tf.compat.v1.global_variables_initializer().run()
        nepochs, loss_hist, acc_hist, f1_hist = pprgo.train(
            sess=sess,
            model=model,
            attr_matrix=attr_matrix,
            train_idx=train_idx,
            val_idx=val_idx,
            topk_train=topk_train,
            topk_val=topk_val,
            labels=labels,
            max_epochs=max_epochs,
            batch_size=batch_size,
            batch_mult_val=batch_mult_val,
            eval_step=eval_step,
            early_stop=early_stop,
            patience=patience,
            ex=ex)
    time_training = time.time() - start
    logging.info('Training done.')

    start = time.time()
    predictions, time_logits, time_propagation = model.predict(
        sess=sess,
        adj_matrix=adj_matrix,
        attr_matrix=attr_matrix,
        alpha=alpha,
        nprop=nprop_inference,
        inf_fraction=inf_fraction,
        ppr_normalization=ppr_normalization)
    time_inference = time.time() - start
    logging.info('Inference done.')

    results = {
        'accuracy_train':
        100 * accuracy_score(labels[train_idx], predictions[train_idx]),
        'accuracy_val':
        100 * accuracy_score(labels[val_idx], predictions[val_idx]),
        'accuracy_test':
        100 * accuracy_score(labels[test_idx], predictions[test_idx]),
        'f1_train':
        f1_score(labels[train_idx], predictions[train_idx], average='macro'),
        'f1_val':
        f1_score(labels[val_idx], predictions[val_idx], average='macro'),
        'f1_test':
        f1_score(labels[test_idx], predictions[test_idx], average='macro'),
    }

    gpu_max_bytes = tf.contrib.memory_stats.MaxBytesInUse()
    results.update({
        'time_loading': time_loading,
        'time_preprocessing': time_preprocessing,
        'time_training': time_training,
        'time_inference': time_inference,
        'time_logits': time_logits,
        'time_propagation': time_propagation,
        'gpu_memory': sess.run(gpu_max_bytes),
        'memory': utils.get_max_memory_bytes(),
        'nepochs': nepochs,
    })
    return results
Beispiel #53
0
def mnist_model(features, labels, mode, params):
    # TODO: Using features["images"], compute `logits` using:
    # - convolutional layer with 8 channels, kernel size 3 and ReLu activation
    # - max pooling layer with pool size 2 and stride 2
    # - convolutional layer with 16 channels, kernel size 3 and ReLu activation
    # - max pooling layer with pool size 2 and stride 2
    # - flattening layer
    # - dense layer with 256 neurons and ReLU activation
    # - dense layer with 10 neurons and no activation

    predictions = tf.argmax(logits, axis=1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # TODO: Return EstimatorSpec with `mode` and `predictions` parameters

    # TODO: Compute loss using `tf.losses.sparse_softmax_cross_entropy`.

    if mode == tf.estimator.ModeKeys.TRAIN:
        # TODO: Get optimizer class, using `params.get("optimizer", None)`.
        # TODO: Create optimizer, using `params.get("learning_rate", None)` parameter.
        # TODO: Define `train_op` as `optimizer.minimize`, with `tf.train.get_global_step` as `global_step`.
        # TODO: Return EstimatorSpec with `mode`, `loss`, `train_op` and `eval_metric_ops` arguments,
        # the latter being a dictionary with "accuracy" key and `tf.metrics.accuracy` value.

    if mode == tf.estimator.ModeKeys.EVAL:
        # TODO: Return EstimatorSpec with `mode`, `loss` and `eval_metric_ops` arguments,
        # the latter being a dictionary with "accuracy" key and `tf.metrics.accuracy` value.


if __name__ == "__main__":
    import argparse
    import datetime
    import os
    import re

    # Fix random seed
    np.random.seed(42)
    tf.set_random_seed(42)

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
    parser.add_argument("--epochs", default=3, type=int, help="Number of epochs.")
    parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
    args = parser.parse_args()

    # Create logdir name
    args.logdir = "logs/{}-{}-{}".format(
        os.path.basename(__file__),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items())))
    )
    if not os.path.exists("logs"): os.mkdir("logs") # TF 1.6 will do this by itself

    # Construct the model
    model = tf.estimator.Estimator(
        model_fn=mnist_model,
        model_dir=args.logdir,
        config=tf.estimator.RunConfig(tf_random_seed=42,
                                      session_config=tf.ConfigProto(inter_op_parallelism_threads=args.threads,
                                                                    intra_op_parallelism_threads=args.threads)),
        params={
            "optimizer": tf.train.AdamOptimizer,
            "learning_rate": 0.001,
        })

    # Load the data
    from tensorflow.examples.tutorials import mnist
    mnist = mnist.input_data.read_data_sets(".", reshape=False, seed=42)

    # Train
    for i in range(args.epochs):
Beispiel #54
0
def setRandomSeed(seed):
    # eliminate random factors
    random.seed(seed)
    tf.set_random_seed(seed)
Beispiel #55
0
import datetime as dt
import statsmodels.api as sm
import itertools
plt.style.use('seaborn-white')
from statsmodels.tsa.arima_model import ARIMA
from tensorflow.keras.layers import Dense
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
print(tf.__version__)
print(keras.__version__)

np.random.seed(1337)
PYTHONHASHSEED = 0
tf.random.set_random_seed(1337)
tf.set_random_seed(1337)

featSelect = 0

#%% load data
finalDataDf = pd.read_csv('data/MyData12.csv')
finalData = finalDataDf.values
X = finalData[:, :-1]
y = finalData[:, -1]

#%% feature normalization  and preprocess
testRate = 0.2
numTrain = int(len(X) * (1 - testRate))
numTest = 1

m = X[:numTrain].mean(axis=0)
Beispiel #56
0
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.009,
          num_epochs=100,
          minibatch_size=64,
          print_cost=True,
          isPlot=True):
    """
    使用TensorFlow实现三层的卷积神经网络
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED

    参数:
        X_train - 训练数据,维度为(None, 64, 64, 3)
        Y_train - 训练数据对应的标签,维度为(None, n_y = 6)
        X_test - 测试数据,维度为(None, 64, 64, 3)
        Y_test - 训练数据对应的标签,维度为(None, n_y = 6)
        learning_rate - 学习率
        num_epochs - 遍历整个数据集的次数
        minibatch_size - 每个小批量数据块的大小
        print_cost - 是否打印成本值,每遍历100次整个数据集打印一次
        isPlot - 是否绘制图谱

    返回:
        train_accuracy - 实数,训练集的准确度
        test_accuracy - 实数,测试集的准确度
        parameters - 学习后的参数
    """
    ops.reset_default_graph()  #能够重新运行模型而不覆盖tf变量
    tf.set_random_seed(1)  #确保你的数据和我一样
    seed = 3  #指定numpy的随机种子
    (m, n_H0, n_W0, n_C0) = X_train.shape
    n_y = Y_train.shape[1]
    costs = []

    #为当前维度创建占位符
    X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)

    #初始化参数
    parameters = initialize_parameters()

    #前向传播
    Z3 = forward_propagation(X, parameters)

    #计算成本
    cost = compute_cost(Z3, Y)

    #反向传播,由于框架已经实现了反向传播,我们只需要选择一个优化器就行了
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    #全局初始化所有变量
    init = tf.global_variables_initializer()

    #开始运行
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        #初始化参数
        sess.run(init)
        #开始遍历数据集
        minibatch_cost = 1
        epoch = 0
        while minibatch_cost > 0.4 and epoch < num_epochs:
            minibatch_cost = 0
            num_minibatches = int(m / minibatch_size)  #获取数据块的数量
            seed = seed + 1
            minibatches = cnn_utils.random_mini_batches(
                X_train, Y_train, minibatch_size, seed)

            #对每个数据块进行处理
            for minibatch in minibatches:
                #选择一个数据块
                (minibatch_X, minibatch_Y) = minibatch
                #最小化这个数据块的成本
                _, temp_cost = sess.run([optimizer, cost],
                                        feed_dict={
                                            X: minibatch_X,
                                            Y: minibatch_Y
                                        })

                #累加数据块的成本值
                minibatch_cost += temp_cost / num_minibatches

            #是否打印成本
            if print_cost:
                #每5代打印一次
                if epoch % 5 == 0:
                    print("当前是第 " + str(epoch) + " 代,成本值为:" +
                          str(minibatch_cost))

            #记录成本
            if epoch % 1 == 0:
                costs.append(minibatch_cost)
            epoch += 1

        #数据处理完毕,绘制成本曲线
        if isPlot:
            plt.plot(np.squeeze(costs))
            plt.ylabel('cost')
            plt.xlabel('iterations (per tens)')
            plt.title("Learning rate =" + str(learning_rate))
            plt.show()

        #开始预测数据
        ## 计算当前的预测情况
        predict_op = tf.arg_max(Z3, 1)
        corrent_prediction = tf.equal(predict_op, tf.arg_max(Y, 1))

        ##计算准确度
        accuracy = tf.reduce_mean(tf.cast(corrent_prediction, "float"))
        print("corrent_prediction accuracy= " + str(accuracy))

        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuary = accuracy.eval({X: X_test, Y: Y_test})

        print("训练集准确度:" + str(train_accuracy))
        print("测试集准确度:" + str(test_accuary))

        return (train_accuracy, test_accuary, parameters)
    def __init__(self, layer1_dim, layer2_dim, layer3_dim, layer4_dim, x_dim1, x_dim2,
                 y_dim, learning_rate, data_num):
        # in order to generate same random sequences
        tf.set_random_seed(1)

        """
        input parameter
        """
        # 否则传不到MLP() 里面,要不然MLP() function 得写成MLP(layer1_dim) 的形式

        self.layer1_dim = layer1_dim
        self.layer2_dim = layer2_dim
        self.layer3_dim = layer3_dim
        self.layer4_dim = layer4_dim
        self.x_dim1 = x_dim1
        self.x_dim2 = x_dim2
        self.y_dim = y_dim
        self.learning_rate = learning_rate
        self.data_num = data_num

        """
        input data
        """
        # training data: record and label
        self.dropout_keep = tf.placeholder(dtype=tf.float32, name='dropout_keep')
        self.xa = tf.placeholder(tf.float32, shape=(None, self.x_dim1), name='xa-input')
        self.xb = tf.placeholder(tf.float32, shape=(None, self.x_dim2), name='xb-input')
        self.y = tf.placeholder(tf.float32, shape=(None, self.y_dim), name='y-input')
        """
        batch norm
        """
        # if self.is_batch_norm:
        #     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        #     with tf.control_dependencies(update_ops):
        #         self.loss = -tf.reduce_mean(self.y * tf.log(tf.clip_by_value(self.y_pred, 1e-10, 1.0)))
        #         self.loss = self.loss + tf.add_n(tf.get_collection('loss')) #L2 regularization
        # else:
        #     self.loss = -tf.reduce_mean(self.y * tf.log(tf.clip_by_value(self.y_pred, 1e-10, 1.0)))
        #     self.loss = self.loss + tf.add_n(tf.get_collection('loss')) #L2 regularization

        """
        graph structure
        """
        # predict data: label
        self.y_pred = self.MLP()
        self.y_pred_softmax = tf.nn.softmax(self.y_pred)
        # print(self.y_pred_softmax)

        # acc
        self.acc = tf.equal(tf.argmax(self.y_pred_softmax, 1), tf.argmax(self.y, 1))
        self.acc = tf.reduce_mean(tf.cast(self.acc, tf.float32))
        """
        model training 
        """
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.y_pred, labels=self.y))
        self.loss_metric = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.y_pred, labels=self.y))
        # self.loss = -tf.reduce_mean(self.y * tf.log(tf.clip_by_value(self.y_pred_softmax, 1e-10, 1.0)))
        # self.loss = tf.losses.mean_squared_error(self.y, self.y_pred_softmax)
        # self.loss = tf.reduce_mean(tf.square(self.y - self.y_pred_softmax))

        # loss_less = 10
        # loss_more = 0.1
        # self.loss = tf.reduce_sum(tf.where(tf.greater(self.y_pred_softmax, self.y),
        # (self.y_pred_softmax-self.y) * loss_more, (self.y-self.y_pred_softmax) * loss_less))

        # optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, name='optimizer')
        # self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate,
        #                                            decay=0.9, momentum=0.0, epsilon=1e-5, name='optimizer')

        # self.optimizer = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate, name='optimizer')
        # self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, name='optimizer')
        # self.optimizer = tf.train.FtrlOptimizer(learning_rate=self.learning_rate, name='optimizer')
        # self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate, name='optimizer')
        # self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.5, name='optimizer')
        # self.optimizer = tf.train.ProximalAdagradOptimizer(learning_rate=self.learning_rate, name='optimizer')
        # self.optimizer = tf.train.ProximalGradientDescentOptimizer(learning_rate=self.learning_rate, name='optimizer')

        self.train_op = self.optimizer.minimize(self.loss, name='train_op')
def main():
    """Create the model and start the training."""

    start = time.time()
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)

    # Predictions.
    raw_output = net.layers['fc1_voc12']

    restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
    all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable if 'fc' not in v.name]  # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name]  # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name]  # lr * 20.0
    assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [-1, ])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, axis=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Define loss and optimisation parameters.
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
    opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
    opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)

    grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
    grads_conv = grads[:len(conv_trainable)]
    grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))]
    grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))

    train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)
    losses = []
    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        feed_dict = {step_ph: step}

        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, _ = sess.run([reduced_loss, image_batch, label_batch, pred, train_op],
                                                            feed_dict=feed_dict)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
        losses.append(loss_value)
    plotLoss(losses)
    coord.request_stop()
    coord.join(threads)

    end = time.time()
    print("Duration taken to complete the training process is {} seconds.".format(end - start))

    savingParams = []
    savingParams.append(loss_value)
    savingParams.append(end - start)

    # write saved parameters from training
    with open(savingOutputDir, 'w') as f:
        print(savingParams, file=f)
    def __init__(self, num_vocabulary, batch_size, emb_dim, hidden_dim, sequence_length, start_token, ):
        self.num_vocabulary = num_vocabulary
        self.batch_size = batch_size
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.sequence_length = sequence_length
        self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32)
        self.g_params = []
        self.temperature = 1.0

        with tf.variable_scope('generator'):
            tf.set_random_seed(1234)
            self.g_embeddings = tf.Variable(
                tf.random_normal([self.num_vocabulary, self.emb_dim], 0.0, 1.0, seed=123314154))
            self.g_params.append(self.g_embeddings)
            self.g_recurrent_unit = self.create_recurrent_unit(self.g_params)  # maps h_tm1 to h_t for generator
            self.g_output_unit = self.create_output_unit(self.g_params)  # maps h_t to o_t (output token logits)

        # placeholder definition
        self.x = tf.placeholder(tf.int32, shape=[self.batch_size,
                                                 self.sequence_length])  # sequence of tokens generated by generator

        # processed for batch
        with tf.device("/cpu:0"):
            tf.set_random_seed(1234)
            self.processed_x = tf.transpose(tf.nn.embedding_lookup(self.g_embeddings, self.x),
                                            perm=[1, 0, 2])  # seq_length x batch_size x emb_dim

        # initial states
        self.h0 = tf.zeros([self.batch_size, self.hidden_dim])
        self.h0 = tf.stack(self.h0)

        # generator on initial randomness
        gen_o = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length,
                                             dynamic_size=False, infer_shape=True)
        gen_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.sequence_length,
                                             dynamic_size=False, infer_shape=True)

        def _g_recurrence(i, x_t, h_tm1, gen_o, gen_x):
            h_t = self.g_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
            o_t = self.g_output_unit(h_t)  # batch x vocab , logits not prob
            log_prob = tf.log(tf.nn.softmax(o_t))
            next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
            x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
            gen_o = gen_o.write(i, tf.reduce_sum(
                tf.multiply(tf.one_hot(next_token, self.num_vocabulary, 1.0, 0.0), tf.nn.softmax(o_t)),
                1))  # [batch_size] , prob
            gen_x = gen_x.write(i, next_token)  # indices, batch_size
            return i + 1, x_tp1, h_t, gen_o, gen_x

        _, _, _, self.gen_o, self.gen_x = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3, _4: i < self.sequence_length,
            body=_g_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.nn.embedding_lookup(self.g_embeddings, self.start_token), self.h0, gen_o, gen_x)
        )

        self.gen_x = self.gen_x.stack()  # seq_length x batch_size
        self.gen_x = tf.transpose(self.gen_x, perm=[1, 0])  # batch_size x seq_length

        # supervised pretraining for generator
        g_predictions = tensor_array_ops.TensorArray(
            dtype=tf.float32, size=self.sequence_length,
            dynamic_size=False, infer_shape=True)

        ta_emb_x = tensor_array_ops.TensorArray(
            dtype=tf.float32, size=self.sequence_length)
        ta_emb_x = ta_emb_x.unstack(self.processed_x)

        def _pretrain_recurrence(i, x_t, h_tm1, g_predictions):
            h_t = self.g_recurrent_unit(x_t, h_tm1)
            o_t = self.g_output_unit(h_t)
            g_predictions = g_predictions.write(i, tf.nn.softmax(o_t))  # batch x vocab_size
            x_tp1 = ta_emb_x.read(i)
            return i + 1, x_tp1, h_t, g_predictions

        _, _, _, self.g_predictions = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3: i < self.sequence_length,
            body=_pretrain_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.nn.embedding_lookup(self.g_embeddings, self.start_token),
                       self.h0, g_predictions))

        self.g_predictions = tf.transpose(
            self.g_predictions.stack(), perm=[1, 0, 2])  # batch_size x seq_length x vocab_size

        # pretraining loss
        self.pretrain_loss = -tf.reduce_sum(
            tf.one_hot(tf.to_int32(tf.reshape(self.x, [-1])), self.num_vocabulary, 1.0, 0.0) * tf.log(
                tf.reshape(self.g_predictions, [-1, self.num_vocabulary]))) / (self.sequence_length * self.batch_size)

        self.out_loss = tf.reduce_sum(
            tf.reshape(
                -tf.reduce_sum(
                    tf.one_hot(tf.to_int32(tf.reshape(self.x, [-1])), self.num_vocabulary, 1.0, 0.0) * tf.log(
                        tf.reshape(self.g_predictions, [-1, self.num_vocabulary])), 1
                ), [-1, self.sequence_length]
            ), 1
        )  # batch_size
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.0001,
          num_epochs=1500,
          minibatch_size=32,
          print_cost=True):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    ops.reset_default_graph(
    )  # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)  # to keep consistent results
    seed = 3  # to keep consistent results
    (
        n_x, m
    ) = X_train.shape  # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]  # n_y : output size
    costs = []  # To keep track of the cost

    # Create Placeholders of shape (n_x, n_y)
    ### START CODE HERE ### (1 line)
    X, Y = create_placeholders(n_x, n_y)
    ### END CODE HERE ###

    # Initialize parameters
    ### START CODE HERE ### (1 line)
    parameters = initialize_parameters()
    ### END CODE HERE ###

    # Forward propagation: Build the forward propagation in the tensorflow graph
    ### START CODE HERE ### (1 line)
    Z3 = forward_propagation(X, parameters)
    ### END CODE HERE ###

    # Cost function: Add cost function to tensorflow graph
    ### START CODE HERE ### (1 line)
    cost = compute_cost(Z3, Y)
    ### END CODE HERE ###

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    ### START CODE HERE ### (1 line)
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)
    ### END CODE HERE ###

    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:

        # Run the initialization
        sess.run(init)

        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.  # Defines a cost related to an epoch
            num_minibatches = int(
                m / minibatch_size
            )  # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size,
                                              seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                ### START CODE HERE ### (1 line)
                _, minibatch_cost = sess.run([optimizer, cost],
                                             feed_dict={
                                                 X: minibatch_X,
                                                 Y: minibatch_Y
                                             })
                ### END CODE HERE ###

                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)

        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))

        return parameters