Beispiel #1
0
  def create_loss():
    """Creates the loss to be optimized.

    Returns:
      bound: A float Tensor containing the value of the bound that is
        being optimized.
      loss: A float Tensor that when differentiated yields the gradients
        to apply to the model. Should be optimized via gradient descent.
    """
    inputs, targets, lengths, model = create_dataset_and_model(
        config, split="train", shuffle=True, repeat=True)
    # Compute lower bounds on the log likelihood.
    if config.bound == "elbo":
      ll_per_seq, _, _, _ = bounds.iwae(
          model, (inputs, targets), lengths, num_samples=1)
    elif config.bound == "iwae":
      ll_per_seq, _, _, _ = bounds.iwae(
          model, (inputs, targets), lengths, num_samples=config.num_samples)
    elif config.bound == "fivo":
      ll_per_seq, _, _, _, _ = bounds.fivo(
          model, (inputs, targets), lengths, num_samples=config.num_samples,
          resampling_criterion=bounds.ess_criterion)
    # Compute loss scaled by number of timesteps.
    ll_per_t = tf.reduce_mean(ll_per_seq / tf.to_float(lengths))
    ll_per_seq = tf.reduce_mean(ll_per_seq)

    tf.summary.scalar("train_ll_per_seq", ll_per_seq)
    tf.summary.scalar("train_ll_per_t", ll_per_t)

    if config.normalize_by_seq_len:
      return ll_per_t, -ll_per_t
    else:
      return ll_per_seq, -ll_per_seq
    def __init__(self,
                 sess,
                 dataset_name='facades',
                 checkpoint_dir=None):
        self.sess = sess
        self.dataset_name = dataset_name
        self.checkpoint_dir = checkpoint_dir

        self.real_data = tf.placeholder(tf.float32,
                                        [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3 + 3],
                                        name='input_images')
        self.real_A = self.real_data[:, :, :, :3]
        self.real_B = self.real_data[:, :, :, 3:6]

        self.fake_B = generator(self.real_A, name="generatorA2B")
        self.fake_A = generator(self.real_B, name="generatorB2A")
        self.fake_B_fake_A = generator(self.fake_B, reuse=True, name="generatorB2A")
        self.fake_A_fake_B = generator(self.fake_A, reuse=True, name="generatorA2B")

        self.DA_real = discriminator(self.real_A, reuse=False, name="descriminatorA")
        self.DB_real = discriminator(self.real_B, reuse=False, name="descriminatorB")
        self.DA_fake = discriminator(self.fake_A, reuse=True, name="descriminatorA")
        self.DB_fake = discriminator(self.fake_B, reuse=True, name="descriminatorB")

        self.g_loss_a2b = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DB_fake, labels=tf.ones_like(self.DB_fake))) + 100 * tf.reduce_mean(
            tf.abs(self.real_A - self.fake_B_fake_A)) + 100 * tf.reduce_mean(
            tf.abs(self.real_B - self.fake_B))
        self.g_loss_b2a = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DA_fake, labels=tf.ones_like(self.DA_fake))) + 100 * tf.reduce_mean(
            tf.abs(self.real_B - self.fake_A_fake_B)) + 100 * tf.reduce_mean(
            tf.abs(self.real_A - self.fake_A))
        self.g_loss = self.g_loss_a2b + self.g_loss_b2a

        self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DB_fake, labels=tf.zeros_like(self.DB_fake))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DB_real, labels=tf.ones_like(self.DB_real))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DA_fake, labels=tf.zeros_like(self.DA_fake))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DA_real, labels=tf.ones_like(self.DA_real)))

        self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
        self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
        self.g_loss_a2b_sum = tf.summary.scalar("g_loss_a2b", self.g_loss_a2b)
        self.g_loss_b2a_sum = tf.summary.scalar("g_loss_b2a", self.g_loss_b2a)
        self.real_A_sum = tf.summary.image("real_A", self.real_A)
        self.real_B_sum = tf.summary.image("real_B", self.real_B)
        self.fake_A_sum = tf.summary.image("fake_A", self.fake_A)
        self.fake_B_sum = tf.summary.image("fake_B", self.fake_B)
        self.fake_AB_sum = tf.summary.image("fake_AB", self.fake_A_fake_B)
        self.fake_BA_sum = tf.summary.image("fake_BA", self.fake_B_fake_A)

        self.d_sum = tf.summary.merge([self.d_loss_sum])
        self.g_sum = tf.summary.merge([self.g_loss_sum, self.g_loss_a2b_sum, self.g_loss_b2a_sum,
                                       self.real_A_sum, self.real_B_sum, self.fake_A_sum,
                                       self.fake_B_sum, self.fake_AB_sum, self.fake_BA_sum])

        training_vars = tf.trainable_variables()
        self.d_vars = [var for var in training_vars if 'd_' in var.name]
        self.g_vars = [var for var in training_vars if 'g_' in var.name]
        self.saver = tf.train.Saver(max_to_keep=5)
Beispiel #3
0
    def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None):
        num_outputs = 1 #ofc
        self.nn_im_w = nn_im_w
        self.nn_im_h = nn_im_h

        if weights is None:
            weights = [None, None, None, None, None]
        if biases is None:
            biases = [None, None, None, None, None]

        with tf.device('/cpu:0'):
            # Placeholder variables for the input image and output images
            self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3])
            self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs])
            self.threshold = tf.placeholder(tf.float32)

            # Build the convolutional and pooling layers
            conv1_output_channels = 32
            conv2_output_channels = 16
            conv3_output_channels = 8

            conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image
            self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1
            self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2
            self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3

            # Build the fully connected layer
            convnet_output_w = nn_im_w//8
            convnet_output_h = nn_im_h//8

            fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels])
            self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3])

            # The dropout stage and readout layer
            self.keep_prob, self.h_drop = self.dropout(self.layers[3][0])
            self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4])

            self.mean_error =  tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv)))
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error)

            self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv))))


            positive_examples = tf.greater_equal(self.y_, 0.5)
            negative_examples = tf.logical_not(positive_examples)
            positive_classifications = tf.greater_equal(self.y_conv, self.threshold)
            negative_classifications = tf.logical_not(positive_classifications)

            self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive
            self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive

            self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative
            self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative

            self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive
            self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative

            self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2])

        self.sess.run(tf.initialize_all_variables())
Beispiel #4
0
    def __init__(self, config):
        self.config = config

        self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input')
        self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels')
        self.labels_one_hot = tf.one_hot(indices=self.labels,
                                         depth=config.output_dim,
                                         on_value=1.0,
                                         off_value=0.0,
                                         axis=-1)

        self.gru = GRUCell(config.hidden_state_dim)

        embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0))
        self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input)
        inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)]

        outputs, last_slu_state = tf.nn.rnn(
            cell=self.gru,
            inputs=inputs,
            dtype=tf.float32,)

        w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0))
        self.logits = logits_bo = tf.matmul(last_slu_state, w_project)
        tf.histogram_summary('logits', logits_bo)
        self.probabilities = tf.nn.softmax(logits_bo)
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot))
        self.predict = tf.nn.softmax(logits_bo)

        # TensorBoard
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy')
        tf.scalar_summary('CCE loss', self.loss)
        tf.scalar_summary('Accuracy', self.accuracy)
        self.tb_info = tf.merge_all_summaries()
Beispiel #5
0
  def get_rebar_gradient(self):
    """Get the rebar gradient."""
    hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
    if self.hparams.quadratic:
      gumbel_cv, _ = self._create_gumbel_control_variate_quadratic(logQHard)
    else:
      gumbel_cv, _ = self._create_gumbel_control_variate(logQHard)

    f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))

    eta = {}
    h_grads, eta_statistics = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
        eta)

    model_grads = U.add_grads_and_vars(f_grads, h_grads)
    total_grads = model_grads

    # Construct the variance objective
    variance_objective = tf.reduce_mean(tf.square(U.vectorize(model_grads, set_none_to_zero=True)))

    debug = { 'ELBO': hardELBO,
             'etas': eta_statistics,
             'variance_objective': variance_objective,
             }
    return total_grads, debug, variance_objective
Beispiel #6
0
    def __init__(self):
        # Import data
        error = None
        for _ in range(10):
            try:
                self.mnist = input_data.read_data_sets(
                    "/tmp/tensorflow/mnist/input_data", one_hot=True)
                error = None
                break
            except Exception as e:
                error = e
                time.sleep(5)
        if error:
            raise ValueError("Failed to import data", error)

        # Set seed and build layers
        tf.set_random_seed(0)

        self.x = tf.placeholder(tf.float32, [None, 784], name="x")
        self.y_ = tf.placeholder(tf.float32, [None, 10], name="y_")
        y_conv, self.keep_prob = deepnn(self.x)

        # Need to define loss and optimizer attributes
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=self.y_, logits=y_conv))
        self.optimizer = tf.train.AdamOptimizer(1e-4)
        self.variables = ray_tf_utils.TensorFlowVariables(
            self.loss, tf.get_default_session())

        # For evaluating test accuracy
        correct_prediction = tf.equal(
            tf.argmax(y_conv, 1), tf.argmax(self.y_, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Beispiel #7
0
 def fprop_noscope(self, x):
     mean = tf.reduce_mean(x, (1, 2), keep_dims=True)
     x = x - mean
     std = tf.sqrt(1e-7 +
                   tf.reduce_mean(tf.square(x), (1, 2), keep_dims=True))
     x = x / std
     return x * self.gamma + self.beta
  def testGradient(self):
    s = [2, 3, 4, 2]
    x = np.arange(1.0, 49.0).reshape(s).astype(np.float32)
    with self.test_session():
      t = tf.convert_to_tensor(x)
      su = tf.reduce_mean(t, [1, 2])
      jacob_t, jacob_n = tf.test.compute_gradient(t,
                                                  s,
                                                  su,
                                                  [2, 2],
                                                  x_init_value=x,
                                                  delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)

      su = tf.reduce_mean(t, [0, 1, 2, 3])
      jacob_t, jacob_n = tf.test.compute_gradient(t,
                                                  s,
                                                  su,
                                                  [1],
                                                  x_init_value=x,
                                                  delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)

      su = tf.reduce_mean(t, [])
      jacob_t, jacob_n = tf.test.compute_gradient(t,
                                                  s,
                                                  su,
                                                  [2, 3, 4, 2],
                                                  x_init_value=x,
                                                  delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
Beispiel #9
0
def standard_reg():
    reg = tf.constant(0.0, dtype=tf.float32)
    reg = reg + standard_w_weight_reg * tf.reduce_mean(tf.square(net_params['sDW1']))
    #reg = reg + standard_w_weight_reg * tf.reduce_mean(tf.square(net_params['sDW2']))    
    reg = reg + regressor_w_weight_reg * tf.reduce_mean(tf.square(net_params['sRW']))

    return reg
Beispiel #10
0
                def get_losses(obj_mask):
                  """Get motion constraint loss."""
                  # Find height of segment.
                  coords = tf.where(tf.greater(  # Shape (num_true, 2=yx)
                      obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32)))
                  y_max = tf.reduce_max(coords[:, 0])
                  y_min = tf.reduce_min(coords[:, 0])
                  seg_height = y_max - y_min
                  f_y = self.intrinsic_mat[i, 0, 1, 1]
                  approx_depth = ((f_y * self.global_scale_var) /
                                  tf.to_float(seg_height))
                  reference_pred = tf.boolean_mask(
                      depth_pred, tf.greater(
                          tf.reshape(obj_mask[:, :, 0],
                                     (self.img_height, self.img_width, 1)),
                          tf.constant(0.5, dtype=tf.float32)))

                  # Establish loss on approx_depth, a scalar, and
                  # reference_pred, our dense prediction. Normalize both to
                  # prevent degenerative depth shrinking.
                  global_mean_depth_pred = tf.reduce_mean(depth_pred)
                  reference_pred /= global_mean_depth_pred
                  approx_depth /= global_mean_depth_pred
                  spatial_err = tf.abs(reference_pred - approx_depth)
                  mean_spatial_err = tf.reduce_mean(spatial_err)
                  return mean_spatial_err
Beispiel #11
0
def variable_summaries(var, name, collection_key):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization).

    Args:
        - var: Tensor for variable from which we want to log.
        - name: Variable name.
        - collection_key: Collection to save the summary to, can be any key of
          `VAR_LOG_LEVELS`.
    """
    if collection_key not in VAR_LOG_LEVELS.keys():
        raise ValueError('"{}" not in `VAR_LOG_LEVELS`'.format(collection_key))
    collections = VAR_LOG_LEVELS[collection_key]

    with tf.name_scope(name):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean, collections)
        num_params = tf.reduce_prod(tf.shape(var))
        tf.summary.scalar('num_params', num_params, collections)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev, collections)
        tf.summary.scalar('max', tf.reduce_max(var), collections)
        tf.summary.scalar('min', tf.reduce_min(var), collections)
        tf.summary.histogram('histogram', var, collections)
        tf.summary.scalar('sparsity', tf.nn.zero_fraction(var), collections)
Beispiel #12
0
def soft_triplet_loss(anchor, positive, negative, extra=True, scope="soft_triplet_loss"):
    r"""Loss for triplet networks as described in the paper:
    `Deep Metric Learning using Triplet Network
    <https://arxiv.org/abs/1412.6622>`_ by Hoffer et al.

    It is a softmax loss using :math:`(anchor-positive)^2` and
    :math:`(anchor-negative)^2` as logits.

    Args:
        anchor (tf.Tensor): anchor feature vectors of shape [Batch, N].
        positive (tf.Tensor): features of positive match of the same shape.
        negative (tf.Tensor): features of negative match of the same shape.
        extra (bool): also return distances for pos and neg.

    Returns:
        tf.Tensor: triplet-loss as scalar (and optionally average_pos_dist, average_neg_dist)
    """

    eps = 1e-10
    with tf.name_scope(scope):
        d_pos = tf.sqrt(tf.reduce_sum(tf.square(anchor - positive), 1) + eps)
        d_neg = tf.sqrt(tf.reduce_sum(tf.square(anchor - negative), 1) + eps)

        logits = tf.stack([d_pos, d_neg], axis=1)
        ones = tf.ones_like(tf.squeeze(d_pos), dtype="int32")

        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ones))

        if extra:
            pos_dist = tf.reduce_mean(d_pos, name='pos-dist')
            neg_dist = tf.reduce_mean(d_neg, name='neg-dist')
            return loss, pos_dist, neg_dist
        else:
            return loss
Beispiel #13
0
    def __init__(self, nA,
                 learning_rate,decay,grad_clip,entropy_beta,
                 state_shape=[84,84,4],
                 master=None, device_name='/gpu:0', scope_name='master'):
        with tf.device(device_name) :
            self.state = tf.placeholder(tf.float32,[None]+state_shape)
            block, self.scope  = ActorCritic._build_shared_block(self.state,scope_name)
            self.policy, self.log_softmax_policy = ActorCritic._build_policy(block,nA,scope_name)
            self.value = ActorCritic._build_value(block,scope_name)

            self.train_vars = sorted(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name), key=lambda v:v.name)
            if( master is not None ) :
                self.sync_op= self._sync_op(master)
                self.action = tf.placeholder(tf.int32,[None,])
                self.target_value = tf.placeholder(tf.float32,[None,])

                advantage = self.target_value - self.value
                entropy = tf.reduce_sum(-1. * self.policy * self.log_softmax_policy,axis=1)
                log_p_s_a = tf.reduce_sum(self.log_softmax_policy * tf.one_hot(self.action,nA),axis=1)

                self.policy_loss = tf.reduce_mean(tf.stop_gradient(advantage)*log_p_s_a)
                self.entropy_loss = tf.reduce_mean(entropy)
                self.value_loss = tf.reduce_mean(advantage**2)

                loss = -self.policy_loss - entropy_beta* self.entropy_loss + self.value_loss
                self.gradients = tf.gradients(loss,self.train_vars)
                clipped_gs = [tf.clip_by_average_norm(g,grad_clip) for g in self.gradients]
                self.train_op = master.optimizer.apply_gradients(zip(clipped_gs,master.train_vars))
            else :
                #self.optimizer = tf.train.AdamOptimizer(learning_rate,beta1=BETA)
                self.optimizer = tf.train.RMSPropOptimizer(learning_rate,decay=decay,use_locking=True)
    def build_graph(self, image, label):
        assert tf.test.is_gpu_available()

        MEAN_IMAGE = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32)
        STD_IMAGE = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32)
        image = ((image / 255.0) - MEAN_IMAGE) / STD_IMAGE
        image = tf.transpose(image, [0, 3, 1, 2])

        pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform')
        with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
                argscope(Conv2D, kernel_initializer=pytorch_default_init):
            net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False)
            for i, blocks_in_module in enumerate(MODULE_SIZES):
                for j in range(blocks_in_module):
                    stride = 2 if j == 0 and i > 0 else 1
                    with tf.variable_scope("res%d.%d" % (i, j)):
                        net = preactivation_block(net, FILTER_SIZES[i], stride)
            net = GlobalAvgPooling('gap', net)
            logits = FullyConnected('linear', net, CLASS_NUM,
                                    kernel_initializer=tf.random_normal_initializer(stddev=1e-3))

        ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)
        ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss')

        single_label = tf.to_int32(tf.argmax(label, axis=1))
        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, single_label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'), ce_cost)
        add_param_summary(('.*/W', ['histogram']))

        # weight decay on all W matrixes. including convolutional layers
        wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')

        return tf.add_n([ce_cost, wd_cost], name='cost')
Beispiel #15
0
        def func_for_scan(prev_output, current_element):

            # Sample decoder weights  __, [1], [1]
            W, log_pW, log_qW = decoder.sample_weights()

            # Sample z   [P,B,Z], [P,B], [P,B]
            z, log_pz, log_qz = self.sample_z(x, encoder, decoder, W)
            # z: [PB,Z]
            z = tf.reshape(z, [self.n_z_particles*self.batch_size, self.z_size])

            # Decode [PB,X]
            y = decoder.feedforward(W, z)
            # y: [P,B,X]
            y = tf.reshape(y, [self.n_z_particles, self.batch_size, self.x_size])

            # Likelihood p(x|z)  [P,B]
            log_px = log_bern(x,y)

            #Store for later
            # log_pW_list.append(tf.reduce_mean(log_pW))
            # log_qW_list.append(tf.reduce_mean(log_qW))
            # log_pz_list.append(tf.reduce_mean(log_pz))
            # log_qz_list.append(tf.reduce_mean(log_qz))
            # log_px_list.append(tf.reduce_mean(log_px))

            to_output = []
            to_output.append(tf.reduce_mean(log_px))
            to_output.append(tf.reduce_mean(log_pz))
            to_output.append(tf.reduce_mean(log_qz))   
            to_output.append(tf.reduce_mean(log_pW))
            to_output.append(tf.reduce_mean(log_qW))
                    
            return tf.stack(to_output)
    def create_z_cycloss(self, z, x_hat, encoder, generator):
        config = self.config
        ops = self.ops
        total = None
        distance = config.distance or ops.lookup('l1_distance')
        if config.z_hat_lambda:
            z_hat_cycloss_lambda = config.z_hat_cycloss_lambda
            recode_z_hat = encoder.reuse(x_hat)
            z_hat_cycloss = tf.reduce_mean(distance(z_hat,recode_z_hat))
            z_hat_cycloss *= z_hat_cycloss_lambda
        if config.z_cycloss_lambda:
            recode_z = encoder.reuse(generator.reuse(z))
            z_cycloss = tf.reduce_mean(distance(z,recode_z))
            z_cycloss_lambda = config.z_cycloss_lambda
            if z_cycloss_lambda is None:
                z_cycloss_lambda = 0
            z_cycloss *= z_cycloss_lambda

        if config.z_hat_lambda and config.z_cycloss_lambda:
            total = z_cycloss + z_hat_cycloss
        elif config.z_cycloss_lambda:
            total = z_cycloss
        elif config.z_hat_lambda:
            total = z_hat_cycloss
        return total
Beispiel #17
0
    def create_graph(self):
        with self.__graph.as_default():
            self.__featurePlaceHolder = tf.placeholder(dtype=tf.int32, shape=[None, self.__window_size * 2])
            self.__labelPlaceHolder = tf.placeholder(dtype=tf.int32, shape=[None, 1])

            onehot_lookup_tables = tf.Variable(
                initial_value=tf.truncated_normal(shape=[self.__vocabulary_size, self.__embedding_size])
            )

            embedding = tf.nn.embedding_lookup(params=onehot_lookup_tables, ids = self.__featurePlaceHolder)

            projection_out = tf.reduce_mean(embedding, axis=1)

            softmax_weight = tf.Variable(initial_value=tf.truncated_normal(
                shape=[self.__vocabulary_size, self.__embedding_size]
            ))
            softmax_biases = tf.Variable(initial_value=tf.zeros([self.__vocabulary_size]))

            sampled_loss_per_batch = tf.nn.sampled_softmax_loss(
                weights=softmax_weight,
                biases=softmax_biases,
                inputs=projection_out,
                labels=self.__labelPlaceHolder,
                num_sampled=self.__num_sampled,
                num_classes=self.__vocabulary_size
            )

            self.__loss = tf.reduce_mean(sampled_loss_per_batch)
            self.__optimizer = tf.train.AdagradOptimizer(1.0).minimize(self.__loss)

            norm = tf.sqrt(tf.reduce_sum(tf.square(onehot_lookup_tables), 1, keep_dims=True))
            self.__normalized_embedding = onehot_lookup_tables / norm
Beispiel #18
0
    def __init__(self, num_features, num_output, l2_reg_lambda=0.0, neg_output=False):
        self.input_x = tf.placeholder(tf.float32, [None, num_features], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_output], name="input_y")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        with tf.name_scope("softmax"):
            filter_shape = [num_features, num_output]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1))
            b = tf.Variable(tf.constant(0.1, shape=[num_output]))

            self.raw_scores = tf.nn.xw_plus_b(self.input_x, W, b, name="scores")
            if neg_output:
                self.scores = tf.nn.elu(self.raw_scores, name="tanh")

            else:
                self.scores = tf.nn.relu(self.raw_scores, name="relu")


            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)

        with tf.name_scope("loss"):
            self.losses = tf.square(tf.sub(self.scores, self.input_y))
            self.avgloss = tf.reduce_mean(tf.abs(tf.sub(self.scores, self.input_y)))
            self.loss = tf.reduce_mean(self.losses) + l2_reg_lambda * l2_loss
  def get(self, rewards, pads, values, final_values,
          log_probs, prev_log_probs, target_log_probs,
          entropies, logits):
    seq_length = tf.shape(rewards)[0]

    not_pad = tf.reshape(1 - pads, [seq_length, -1, self.num_samples])
    rewards = not_pad * tf.reshape(rewards, [seq_length, -1, self.num_samples])
    log_probs = not_pad * tf.reshape(sum(log_probs), [seq_length, -1, self.num_samples])

    total_rewards = tf.reduce_sum(rewards, 0)
    total_log_probs = tf.reduce_sum(log_probs, 0)

    rewards_and_bonus = (total_rewards +
                         self.bonus_weight *
                         self.get_bonus(total_rewards, total_log_probs))

    baseline = tf.reduce_mean(rewards_and_bonus, 1, keep_dims=True)

    loss = -tf.stop_gradient(rewards_and_bonus - baseline) * total_log_probs
    loss = tf.reduce_mean(loss)
    raw_loss = loss  # TODO

    gradient_ops = self.training_ops(
        loss, learning_rate=self.learning_rate)

    tf.summary.histogram('log_probs', total_log_probs)
    tf.summary.histogram('rewards', total_rewards)
    tf.summary.scalar('avg_rewards',
                      tf.reduce_mean(total_rewards))
    tf.summary.scalar('loss', loss)

    return loss, raw_loss, baseline, gradient_ops, tf.summary.merge_all()
Beispiel #20
0
def cnn_setup(x, y, keep_prob, lr, stddev):
    first_hidden = 32
    second_hidden = 64
    fc_hidden = 1024
    W_conv1 = weight([5, 5, 1, first_hidden], stddev)
    B_conv1 = bias([first_hidden])
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + B_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    W_conv2 = weight([5, 5, first_hidden, second_hidden], stddev)
    b_conv2 = bias([second_hidden])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    W_fc1 = weight([7 * 7 * second_hidden, fc_hidden], stddev)
    b_fc1 = bias([fc_hidden])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * second_hidden])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    W_fc2 = weight([fc_hidden, 10], stddev)
    b_fc2 = bias([10])
    y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    cross_entropy = tf.reduce_mean(
        -tf.reduce_sum(y * tf.log(y_conv), reduction_indices=[1]))
    correct_pred = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
    return (tf.train.AdamOptimizer(lr).minimize(cross_entropy),
            tf.reduce_mean(tf.cast(correct_pred, tf.float32)), cross_entropy)
 def test_expected_value(self):
   shape_ = np.array([2, int(1e3)], np.int32)
   shape = (tf.constant(shape_) if self.use_static_shape
            else tf.placeholder_with_default(shape_, shape=None))
   # This shape will require broadcasting before sampling.
   scale_ = np.linspace(0.1, 0.5, 3 * 2).astype(self.dtype).reshape(3, 2)
   scale = (tf.constant(scale_) if self.use_static_shape
            else tf.placeholder_with_default(scale_, shape=None))
   x = tfp.math.random_rayleigh(shape,
                                scale=scale[..., tf.newaxis],
                                dtype=self.dtype,
                                seed=42)
   self.assertEqual(self.dtype, x.dtype.as_numpy_dtype)
   final_shape_ = [3, 2, int(1e3)]
   if self.use_static_shape:
     self.assertAllEqual(final_shape_, x.shape)
   sample_mean = tf.reduce_mean(x, axis=-1, keepdims=True)
   sample_var = tf.reduce_mean(tf.squared_difference(
       x, sample_mean), axis=-1)
   [x_, sample_mean_, sample_var_] = self.evaluate([
       x, sample_mean[..., 0], sample_var])
   self.assertAllEqual(final_shape_, x_.shape)
   self.assertAllEqual(np.ones_like(x_, dtype=np.bool), x_ > 0.)
   self.assertAllClose(np.sqrt(np.pi / 2.) * scale_, sample_mean_,
                       atol=0.05, rtol=0.)
   self.assertAllClose(0.5 * (4. - np.pi) * scale_**2., sample_var_,
                       atol=0.05, rtol=0.)
  def _summarize_input(self, groundtruth_boxes_list, match_list):
    """Creates tensorflow summaries for the input boxes and anchors.

    This function creates four summaries corresponding to the average
    number (over images in a batch) of (1) groundtruth boxes, (2) anchors
    marked as positive, (3) anchors marked as negative, and (4) anchors marked
    as ignored.

    Args:
      groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4]
        containing corners of the groundtruth boxes.
      match_list: a list of matcher.Match objects encoding the match between
        anchors and groundtruth boxes for each image of the batch,
        with rows of the Match objects corresponding to groundtruth boxes
        and columns corresponding to anchors.
    """
    num_boxes_per_image = tf.stack(
        [tf.shape(x)[0] for x in groundtruth_boxes_list])
    pos_anchors_per_image = tf.stack(
        [match.num_matched_columns() for match in match_list])
    neg_anchors_per_image = tf.stack(
        [match.num_unmatched_columns() for match in match_list])
    ignored_anchors_per_image = tf.stack(
        [match.num_ignored_columns() for match in match_list])
    tf.summary.scalar('Input/AvgNumGroundtruthBoxesPerImage',
                      tf.reduce_mean(tf.to_float(num_boxes_per_image)))
    tf.summary.scalar('Input/AvgNumPositiveAnchorsPerImage',
                      tf.reduce_mean(tf.to_float(pos_anchors_per_image)))
    tf.summary.scalar('Input/AvgNumNegativeAnchorsPerImage',
                      tf.reduce_mean(tf.to_float(neg_anchors_per_image)))
    tf.summary.scalar('Input/AvgNumIgnoredAnchorsPerImage',
                      tf.reduce_mean(tf.to_float(ignored_anchors_per_image)))
  def eval_summary(self, ground_truth, prediction):
    """
      Compute evaluation metrics (for EVAL mode).

    Args:
      ground_truth: Ground truth, shape: (?, #priors, 4 + #classes).
      prediction: Dictionary of predicted tensors, shape: {'locs'  : (?, #priors, 4), \
                                                           'confs' : (?, #priors, #classes), \
                                                           'logits': (?, #priors, #classes)}.
    Returns:
      Loss stub, shape: (1,).
    """
    localization_loss = self._localization_loss(ground_truth[:, :, :4],
                                                prediction['locs'])  # shape: (batch_size, num_priors)
    classification_loss = self._classification_loss(ground_truth[:, :, 4:],
                                                    prediction['logits'])  # shape: (batch_size, num_priors)
    positives = tf.reduce_max(ground_truth[:, :, 5:], axis=-1)  # shape: (batch_size, num_priors)
    num_positives = tf.reduce_sum(positives)  # shape: (1,)
    loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1)  # shape: (batch_size,)
    classification_loss = tf.reduce_sum(classification_loss, axis=-1)  # shape: (batch_size,)

    evaluation_tensors = {
      'total_classification_loss':  tf.reduce_mean(classification_loss),
      'total_localization_loss': tf.reduce_mean(loc_loss),
    }

    self.__add_evaluation(evaluation_tensors)

    total_loss = tf.reduce_mean(classification_loss + self.loc_weight * loc_loss) / tf.maximum(1.0, num_positives)
    return total_loss
Beispiel #24
0
def _potential_scale_reduction_single_state(state, independent_chain_ndims):
  """potential_scale_reduction for one single state `Tensor`."""
  with tf.name_scope(
      'potential_scale_reduction_single_state',
      values=[state, independent_chain_ndims]):
    # We assume exactly one leading dimension indexes e.g. correlated samples
    # from each Markov chain.
    state = tf.convert_to_tensor(state, name='state')
    sample_ndims = 1

    sample_axis = tf.range(0, sample_ndims)
    chain_axis = tf.range(sample_ndims,
                          sample_ndims + independent_chain_ndims)
    sample_and_chain_axis = tf.range(
        0, sample_ndims + independent_chain_ndims)

    n = _axis_size(state, sample_axis)
    m = _axis_size(state, chain_axis)

    # In the language of Brooks and Gelman (1998),
    # B / n is the between chain variance, the variance of the chain means.
    # W is the within sequence variance, the mean of the chain variances.
    b_div_n = _reduce_variance(
        tf.reduce_mean(state, sample_axis, keepdims=True),
        sample_and_chain_axis,
        biased=False)
    w = tf.reduce_mean(
        _reduce_variance(state, sample_axis, keepdims=True, biased=True),
        sample_and_chain_axis)

    # sigma^2_+ is an estimate of the true variance, which would be unbiased if
    # each chain was drawn from the target.  c.f. "law of total variance."
    sigma_2_plus = w + b_div_n

    return ((m + 1.) / m) * sigma_2_plus / w - (n - 1.) / (m * n)
Beispiel #25
0
    def init_opt(self):
        is_recurrent = int(self.policy.recurrent)
        obs_var = self.env.observation_space.new_tensor_variable(
            'obs',
            extra_dims=1 + is_recurrent,
        )
        action_var = self.env.action_space.new_tensor_variable(
            'action',
            extra_dims=1 + is_recurrent,
        )
        advantage_var = tensor_utils.new_tensor(
            'advantage',
            ndim=1 + is_recurrent,
            dtype=tf.float32,
        )
        dist = self.policy.distribution

        old_dist_info_vars = {
            k: tf.placeholder(tf.float32, shape=[None] * (1 + is_recurrent) + list(shape), name='old_%s' % k)
            for k, shape in dist.dist_info_specs
            }
        old_dist_info_vars_list = [old_dist_info_vars[k] for k in dist.dist_info_keys]

        state_info_vars = {
            k: tf.placeholder(tf.float32, shape=[None] * (1 + is_recurrent) + list(shape), name=k)
            for k, shape in self.policy.state_info_specs
            }
        state_info_vars_list = [state_info_vars[k] for k in self.policy.state_info_keys]

        if is_recurrent:
            valid_var = tf.placeholder(tf.float32, shape=[None, None], name="valid")
        else:
            valid_var = None

        dist_info_vars = self.policy.dist_info_sym(obs_var, state_info_vars)
        kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)
        lr = dist.likelihood_ratio_sym(action_var, old_dist_info_vars, dist_info_vars)
        if is_recurrent:
            mean_kl = tf.reduce_sum(kl * valid_var) / tf.reduce_sum(valid_var)
            surr_loss = - tf.reduce_sum(lr * advantage_var * valid_var) / tf.reduce_sum(valid_var)
        else:
            mean_kl = tf.reduce_mean(kl)
            surr_loss = - tf.reduce_mean(lr * advantage_var)

        input_list = [
                         obs_var,
                         action_var,
                         advantage_var,
                     ] + state_info_vars_list + old_dist_info_vars_list
        if is_recurrent:
            input_list.append(valid_var)

        self.optimizer.update_opt(
            loss=surr_loss,
            target=self.policy,
            leq_constraint=(mean_kl, self.step_size),
            inputs=input_list,
            constraint_name="mean_kl"
        )
        return dict()
  def testSampleConsistentStats(self):
    loc = np.float32([[-1., 1], [1, -1]])
    scale = np.float32([1., 0.5])
    n_samp = 1e4
    with self.test_session() as sess:
      ind = tfd.Independent(
          distribution=tfd.MultivariateNormalDiag(
              loc=loc, scale_identity_multiplier=scale),
          reinterpreted_batch_ndims=1)

      x = ind.sample(int(n_samp), seed=42)
      sample_mean = tf.reduce_mean(x, axis=0)
      sample_var = tf.reduce_mean(tf.squared_difference(x, sample_mean), axis=0)
      sample_std = tf.sqrt(sample_var)
      sample_entropy = -tf.reduce_mean(ind.log_prob(x), axis=0)

      [
          sample_mean_, sample_var_, sample_std_, sample_entropy_,
          actual_mean_, actual_var_, actual_std_, actual_entropy_,
          actual_mode_,
      ] = sess.run([
          sample_mean, sample_var, sample_std, sample_entropy,
          ind.mean(), ind.variance(), ind.stddev(), ind.entropy(), ind.mode(),
      ])

      self.assertAllClose(sample_mean_, actual_mean_, rtol=0.02, atol=0.)
      self.assertAllClose(sample_var_, actual_var_, rtol=0.04, atol=0.)
      self.assertAllClose(sample_std_, actual_std_, rtol=0.02, atol=0.)
      self.assertAllClose(sample_entropy_, actual_entropy_, rtol=0.01, atol=0.)
      self.assertAllClose(loc, actual_mode_, rtol=1e-6, atol=0.)
Beispiel #27
0
    def build_graph(self, image_pos):
        image_pos = image_pos / 128.0 - 1

        z = tf.random_normal([self.batch, self.zdim], name='z_train')
        z = tf.placeholder_with_default(z, [None, self.zdim], name='z')

        with argscope([Conv2D, Conv2DTranspose, FullyConnected],
                      kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                image_gen = self.generator(z)
            tf.summary.image('generated-samples', image_gen, max_outputs=30)

            alpha = tf.random_uniform(shape=[self.batch, 1, 1, 1],
                                      minval=0., maxval=1., name='alpha')
            interp = image_pos + alpha * (image_gen - image_pos)

            with tf.variable_scope('discrim'):
                vecpos = self.discriminator(image_pos)
                vecneg = self.discriminator(image_gen)
                vec_interp = self.discriminator(interp)

        # the Wasserstein-GAN losses
        self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss')
        self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')

        # the gradient penalty loss
        gradients = tf.gradients(vec_interp, [interp])[0]
        gradients = tf.sqrt(tf.reduce_sum(tf.square(gradients), [1, 2, 3]))
        gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms')
        gradient_penalty = tf.reduce_mean(tf.square(gradients - 1), name='gradient_penalty')
        add_moving_summary(self.d_loss, self.g_loss, gradient_penalty, gradients_rms)

        self.d_loss = tf.add(self.d_loss, 10 * gradient_penalty)

        self.collect_variables()
def get_train(train_ph_dict,var_dict,var_ph_dict):
    mid0 = tf.one_hot(train_ph_dict['choice_0'], 9, axis=-1, dtype=tf.float32)
    mid0 = mid0 * get_q(train_ph_dict['state_0'],var_dict)
    mid0 = tf.reduce_sum(mid0, reduction_indices=[1])

    mid1 = get_q(train_ph_dict['state_1'],var_ph_dict)
    mid1 = tf.reduce_max(mid1, reduction_indices=[1])  
    mid1 = mid1 * train_ph_dict['cont']
    mid1 = mid1 * tf.constant(TRAIN_BETA)

    l2r = tf.constant(0.0)
    cell_count = tf.constant(0.0)
    for v in var_dict.values():
        l2r = l2r + get_l2(v)
        cell_count = cell_count + tf.to_float(tf.size(v))
    l2r = l2r / cell_count
    l2r = l2r / tf.constant(ELEMENT_L2_FACTOR*ELEMENT_L2_FACTOR)
    l2r = l2r * tf.constant(L2_WEIGHT)
    
    mid = mid0-mid1-train_ph_dict['reward_1']
#    mid = mid * mid
    mid = tf.abs(mid)
    mid = tf.reduce_mean(mid)
    score_diff = mid
    mid = mid + l2r
    mid = mid + ( tf.abs( tf.reduce_mean(var_dict['b5']) ) * tf.constant(L2_WEIGHT) )

    loss = mid

    mid = tf.train.GradientDescentOptimizer(0.5).minimize(mid,var_list=var_dict.values())
    train = mid
    
    return train, loss, score_diff
Beispiel #29
0
def batchnormalize(X, eps=1e-8, g=None, b=None):
    if X.get_shape().ndims == 4:
        mean = tf.reduce_mean(X, [0,1,2])
        std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )
        X = (X-mean) / tf.sqrt(std+eps)

        if g is not None and b is not None:
            g = tf.reshape(g, [1,1,1,-1])
            b = tf.reshape(b, [1,1,1,-1])
            X = X*g + b

    elif X.get_shape().ndims == 2:
        mean = tf.reduce_mean(X, 0)
        std = tf.reduce_mean(tf.square(X-mean), 0)
        X = (X-mean) / tf.sqrt(std+eps)#std

        if g is not None and b is not None:
            g = tf.reshape(g, [1,-1])
            b = tf.reshape(b, [1,-1])
            X = X*g + b

    else:
        raise NotImplementedError

    return X
Beispiel #30
0
def calc_reward(outputs):
  outputs = outputs[-1]  # look at ONLY THE END of the sequence
  outputs = tf.reshape(outputs, (batch_size, cell_out_size))
  h_a_out = weight_variable((cell_out_size, n_classes))

  p_y = tf.nn.softmax(tf.matmul(outputs, h_a_out))
  max_p_y = tf.arg_max(p_y, 1)
  correct_y = tf.cast(labels_placeholder, tf.int64)

  R = tf.cast(tf.equal(max_p_y, correct_y), tf.float32)  # reward per example

  reward = tf.reduce_mean(R)  # overall reward

  p_loc = gaussian_pdf(mean_locs, sampled_locs)
  p_loc = tf.reshape(p_loc, (batch_size, glimpses * 2))

  R = tf.reshape(R, (batch_size, 1))
  J = tf.concat(1, [tf.log(p_y + 1e-5) * onehot_labels_placeholder, tf.log(
      p_loc + 1e-5) * R])
  J = tf.reduce_sum(J, 1)
  J = tf.reduce_mean(J, 0)
  cost = -J

  optimizer = tf.train.AdamOptimizer(lr)
  train_op = optimizer.minimize(cost)

  return cost, reward, max_p_y, correct_y, train_op
Beispiel #31
0
    def generator(self, img_batch):
        with tf.variable_scope('g_') as vs:
            """ -----------------------------------------------------------------------------------
            ENCODER 
            ----------------------------------------------------------------------------------- """
            print('ENCODER')

            self.en_h0 = conv2d(img_batch, self.channels, 128, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv1")
            self.en_h0 = tf.nn.relu(tf.contrib.layers.batch_norm(self.en_h0))
            add_activation_summary(self.en_h0)
            print(self.en_h0.get_shape().as_list())

            self.en_h1 = conv2d(self.en_h0, 128, 256, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv2")
            self.en_h1 = tf.contrib.layers.batch_norm(self.en_h1, scope="enc_bn2")
            self.en_h1 = tf.nn.relu(self.en_h1)
            add_activation_summary(self.en_h1)
            print(self.en_h1.get_shape().as_list())

            self.en_h2 = conv2d(self.en_h1, 256, 512, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv3")
            self.en_h2 = tf.contrib.layers.batch_norm(self.en_h2, scope="enc_bn3")
            self.en_h2 = tf.nn.relu(self.en_h2)
            add_activation_summary(self.en_h2)
            print(self.en_h2.get_shape().as_list())

            self.en_h3 = conv2d(self.en_h2, 512, 1024, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv4")
            self.en_h3 = tf.contrib.layers.batch_norm(self.en_h3, scope="enc_bn4")
            self.en_h3 = tf.nn.relu(self.en_h3)
            add_activation_summary(self.en_h3)
            print(self.en_h3.get_shape().as_list())

            """ -----------------------------------------------------------------------------------
            GENERATOR 
            ----------------------------------------------------------------------------------- """
            print('GENERATOR')

            self.z_ = tf.reshape(self.en_h3, [self.batch_size, 2, 2, 1024])
            print(self.z_.get_shape().as_list())

            self.fg_h1 = tf.image.resize_images(self.z_, [4,4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h1 = conv2d(self.fg_h1, 1024, 512, d_h=1, d_w=1, name="gen_conv1")
            self.fg_h1 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h1, scope='g_f_bn1'), name='g_f_relu1')
            add_activation_summary(self.fg_h1)
            print(self.fg_h1.get_shape().as_list())

            self.fg_h2 = tf.image.resize_images(self.fg_h1, [8,8], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h2 = conv2d(self.fg_h2, 512, 256, d_h=1, d_w=1, name="gen_conv2")
            self.fg_h2 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h2, scope='g_f_bn2'), name='g_f_relu2')
            add_activation_summary(self.fg_h2)
            print(self.fg_h2.get_shape().as_list())

            self.fg_h3 = tf.image.resize_images(self.fg_h2, [16,16], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h3 = conv2d(self.fg_h3, 256, 128, d_h=1, d_w=1, name="gen_conv3")
            self.fg_h3 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h3, scope='g_f_bn3'), name='g_f_relu3')
            add_activation_summary(self.fg_h3)
            print(self.fg_h3.get_shape().as_list())

            self.fg_h4 = tf.image.resize_images(self.fg_h3, [self.crop_size,self.crop_size], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h4 = conv2d(self.fg_h4, 128, self.channels, d_h=1, d_w=1, name="gen_conv4")
            self.fg_fg = tf.nn.tanh(self.fg_h4, name='g_f_actication')
            print(self.fg_fg.get_shape().as_list())

            gen_reg = tf.reduce_mean(tf.square(img_batch - self.fg_fg))

        variables = tf.contrib.framework.get_variables(vs)
        return self.fg_fg, gen_reg, variables
if __name__ == '__main__':
    tf.reset_default_graph()

    # Setup input, e.g. data that changes every batch
    X = tf.placeholder(tf.float32, [
        None, 32, 32, 3
    ])  # First dim is None, and gets set automatically based on batch size
    y = tf.placeholder(tf.int64, [None])
    is_training = tf.placeholder(tf.bool)

    # Construct model
    tf_pred = simple_model(X, y)

    # Define loss
    total_loss = tf.losses.hinge_loss(tf.one_hot(y, 10), logits=tf_pred)
    mean_loss = tf.reduce_mean(total_loss)

    # Define optimizer
    optimizer = tf.train.AdamOptimizer(5e-4)  # Set learning rate
    train_step = optimizer.minimize(mean_loss)

    data = data_utils.get_preprocessed_CIFAR10('datasets/cifar-10-batches-py',
                                               should_transpose=False)
    Xtr, ytr = data['X_train'], data['y_train']

    with tf.Session() as sess:
        with tf.device('/gpu:0'):
            sess.run(tf.global_variables_initializer())
            run_model(sess,
                      tf_pred,
                      X,
relu1_output=new_relu_layer(input=max1_output,name='relu_layer1')
conv2_ouput,weights_conv2=new_convolution_layer(input=relu1_output,num_input_channel=6,filter_size=5,num_filter=16,name='conv_layer2')
max2_output=new_pool_layer(input=conv2_ouput,name='maxpool_layer2')
relu2_output=new_relu_layer(input=max2_output,name='relu_layer2')
num_features=relu2_output.get_shape()[1:4].num_elements()
layer_flat=tf.reshape(relu2_output,[-1,num_features])
fc1_output=new_fc_layer(input=layer_flat,num_inputs=num_features,num_outputs=128,name='fc_layer1')
relu3_output=new_relu_layer(input=fc1_output,name='relu_layer3')
fc2_output=new_fc_layer(input=relu3_output,num_inputs=128,num_outputs=10,name='fc_layer2')
with tf.variable_scope("Softmax"):
  y_pred=tf.nn.softmax(fc2_output)
  y_pred_class=tf.argmax(y_pred,dimension=1)

with tf.variable_scope('entropy'):
  crossentropy=tf.nn.softmax_cross_entropy_with_logits(logits=fc2_output,labels=y_true)
  cost = tf.reduce_mean(crossentropy)

with tf.variable_scope('optimiser'):
  optimizer=tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)

with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(y_pred_class, y_true_cls)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

writer = tf.summary.FileWriter("Training_FileWriter/")
writer1 = tf.summary.FileWriter("Validation_FileWriter/")


# Add the cost and accuracy to summary
tf.summary.scalar('loss', cost)
tf.summary.scalar('accuracy', accuracy)
Beispiel #34
0
print("{},{}".format(m, n))
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
print(housing_data_plus_bias.shape)
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
print(scaled_housing_data_plus_bias.shape)

learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()

n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    know = np.random.seed(epoch * n_batches +  batch_index)
    indices = np.random.randint(m, size=batch_size)
Beispiel #35
0
    def run(self, dataset):

        tf.reset_default_graph()

        x = tf.placeholder(tf.float32, [None, self.n_input, self.n_input])
        y = tf.placeholder(tf.float32, [None, self.n_classes])

        #preprocess data
        #maxabsscaler = preprocessing.MaxAbsScaler()
        dataset.train.data = (
            dataset.train.data - np.mean(dataset.train.data)) / np.std(
                dataset.train.data)  #preprocessing.scale(dataset.train.data)
        dataset.test.data = (
            dataset.test.data - np.mean(dataset.test.data)) / np.std(
                dataset.test.data)  #preprocessing.scale(dataset.test.data)
        # eps = 1e-8
        # dataset.train.data = np.log2(dataset.train.data + eps)
        # dataset.test.data = np.log2(dataset.test.data + eps)

        # Construct model
        pred = self.conv(x)
        result = tf.nn.softmax(pred)
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))

        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.lr).minimize(cost)

        saver = tf.train.Saver()

        # Initializing the variables
        init = tf.global_variables_initializer()

        # Launch the graph
        sess = tf.Session()
        sess.run(init)

        if self.load:
            saver.restore(sess, '/tmp/cnn')

        total_batch = int(dataset.train.num_examples / self.batch_size)

        # idxs = np.arange(dataset.train.data.shape[1])
        # np.random.shuffle(idxs)

        # Training cycle
        for epoch in range(self.epochs):
            avg_cost = 0.

            dataset.shuffle()

            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y = dataset.train.next_batch(self.batch_size, i)

                #batch_x = dataset.train.permute(batch_x, idxs)
                _, c, r = sess.run([optimizer, cost, result],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y
                                   })

                # Compute average loss
                avg_cost += c / total_batch

            if self.verbose:
                print("Epoch:", '%04d' % (epoch + 1), "cost=",
                      "{:.9f}".format(avg_cost))

        if self.save:
            saver.save(sess, "/tmp/cnn")

        # Test model
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        accs = []

        total_test_batch = int(dataset.test.num_examples / self.batch_size)
        for i in range(total_test_batch):
            batch_x, batch_y = dataset.test.next_batch(self.batch_size, i)
            #batch_x = dataset.train.permute(batch_x, idxs)
            accs.append(accuracy.eval({x: batch_x, y: batch_y}, session=sess))

        sess.close()

        print accs

        return sum(accs) / float(len(accs))
Beispiel #36
0
def loop_encode_decode_stateful(seq_len, batch_size, vocab_size, input_tokens,
                                output_tokens, gen_encoder, gen_decoder,
                                enc_units, tf_ratio, train_test, s_stateful,
                                mut_freq, pos_variations_count, batch_step):
    loss = tf.constant(0.0)
    global_logits = list()
    enc_state_f = tf.zeros((batch_size, enc_units))
    enc_state_b = tf.zeros((batch_size, enc_units))
    n_stateful_batches = int(input_tokens.shape[1] / float(s_stateful))
    i_tokens = tf.fill([batch_size, 1], 0)
    for stateful_index in range(n_stateful_batches):
        s_batch = input_tokens[:, stateful_index *
                               s_stateful:(stateful_index + 1) * s_stateful]
        enc_output, enc_state_f, enc_state_b = gen_encoder(
            [s_batch, enc_state_f, enc_state_b], training=True)
        dec_state = tf.concat([enc_state_f, enc_state_b], -1)
        dec_state = tf.math.add(
            dec_state,
            tf.random.normal((dec_state.shape[0], dec_state.shape[1]),
                             stddev=enc_stddev))

        for t in range(s_batch.shape[1]):
            dec_result, dec_state = gen_decoder([i_tokens, dec_state],
                                                training=True)
            dec_state = tf.math.add(
                dec_state,
                tf.random.normal((dec_state.shape[0], dec_state.shape[1]),
                                 stddev=dec_stddev))
            orig_t = stateful_index * s_stateful + t
            if len(output_tokens) > 0:
                o_tokens = output_tokens[:, orig_t:orig_t + 1]

                # collect different variations at each POS
                u_var_distribution = np.array(
                    list(pos_variations_count[str(orig_t)].values()))
                unique_cls = np.array(
                    list(pos_variations_count[str(orig_t)].keys()))

                all_cls = tf.repeat(unique_cls,
                                    repeats=u_var_distribution).numpy()
                random.shuffle(all_cls)
                y = all_cls
                classes = unique_cls
                le = LabelEncoder()
                y_ind = le.fit_transform(y)
                recip_freq = len(y) / (len(le.classes_) *
                                       np.bincount(y_ind).astype(np.float64))
                class_wt = recip_freq[le.transform(classes)]
                beta = 0.9999
                s_wts = np.sum(class_wt)

                class_var_pos = dict()
                norm_class_var_pos = dict()
                exp_class_var_pos = dict()
                real_class_wts = list()
                for k_i, key in enumerate(unique_cls):
                    # loss input taken from paper: https://arxiv.org/pdf/1901.05555.pdf
                    class_var_pos[key] = class_wt[k_i]  #/ float(s_wts)
                    norm_class_var_pos[key] = class_wt[k_i] / float(s_wts)
                    exp_class_var_pos[key] = (1 - beta) / (
                        1 - beta**pos_variations_count[str(orig_t)][key])
                    real_class_wts.append(exp_class_var_pos[key])
                '''for key in exp_class_var_pos:
                    exp_class_var_pos[key] = exp_class_var_pos[key] / np.sum(real_class_wts)'''

                exp_norm_u_var_distribution = np.zeros((batch_size))
                uniform_wts = np.zeros((batch_size))
                for pos_idx, pos in enumerate(
                        np.reshape(o_tokens, (batch_size, ))):
                    exp_norm_u_var_distribution[pos_idx] = exp_class_var_pos[
                        pos]  #/ float(np.sum(real_class_wts))
                exp_norm_u_var_distribution = exp_norm_u_var_distribution / np.sum(
                    exp_norm_u_var_distribution)
                weighted_loss = tf.reduce_mean(
                    cross_entropy_loss(
                        o_tokens,
                        dec_result,
                        sample_weight=exp_norm_u_var_distribution))
                #step_loss = weighted_loss
                loss += weighted_loss
                global_logits.append(dec_result)
            i_tokens = o_tokens

    global_logits = tf.concat(global_logits, axis=-2)
    #loss = loss / seq_len
    return global_logits, gen_encoder, gen_decoder, loss
Beispiel #37
0
def loss_function(real, pred):
    cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=False, reduction='none')
    loss = cross_entropy(y_true=real, y_pred=pred)
    loss = tf.reduce_mean(loss)
    return loss
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden)

# outputs : [batch_size, len_seq, n_hidden], states : [batch_size, n_hidden]
outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
                                             lstm_bw_cell,
                                             X,
                                             dtype=tf.float32)

outputs = tf.concat([outputs[0], outputs[1]],
                    2)  # output[0] : lstm_fw, output[1] : lstm_bw
outputs = tf.transpose(outputs, [1, 0, 2])  # [n_step, batch_size, n_hidden]
outputs = outputs[-1]  # [batch_size, n_hidden]

model = tf.matmul(outputs, W) + b  # the final output result (one-hot format)

cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

prediction = tf.cast(tf.argmax(model, 1), tf.int32)

# Training
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

input_batch, target_batch = make_batch(sentence)

for epoch in range(10000):
    _, loss = sess.run([optimizer, cost],
                       feed_dict={
                           X: input_batch,
# Build graph
real_data = tf.placeholder(tf.float32, shape=[None, OUTPUT_DIM])
input_noise = tf.placeholder(tf.float32, shape=[None, NOISE_DIM])
fake_data = Generator(BATCH_SIZE, input_noise)

dis_real, real_noise = Discriminator(real_data)
dis_fake, invert_noise = Discriminator(fake_data)

gen_params = lib.params_with_name('Generator')
dis_params = lib.params_with_name('Discriminator')
inv_params = lib.params_with_name('Invertor')

# Optimize cost function
if MODE == 'wgan-gp':
  inv_cost = tf.reduce_mean(tf.square(input_noise - invert_noise))
  gen_cost = -tf.reduce_mean(dis_fake)
  dis_cost = tf.reduce_mean(dis_fake) - tf.reduce_mean(dis_real)

  alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.)
  differences = fake_data - real_data
  interpolates = real_data + alpha * differences
  gradients = tf.gradients(Discriminator(interpolates)[0], [interpolates])[0]
  slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1))
  gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
  dis_cost_gp = dis_cost + LAMBDA * gradient_penalty

  inv_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5,
                                        beta2=0.9).minimize(inv_cost,
                                                            var_list=inv_params)
  gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5,
Beispiel #40
0
    def build_model(self):
        print("Setting up model...")

        # input_images = First frame of video
        self.input_images = tf.placeholder(tf.float32, [self.batch_size, self.crop_size, self.crop_size, self.channels])
        self.videos_fake, self.gen_reg, self.generator_variables = self.generator(self.input_images)

        self.fake_min = tf.reduce_min(self.videos_fake)
        self.fake_max = tf.reduce_max(self.videos_fake)

        print('Shapes of videos:')
        print('Original:')
        print(self.videos.shape)
        print('Generated:')
        print(self.videos_fake.shape)

        self.d_real, self.discriminator_variables = self.discriminator(self.videos, reuse=False)

        # merging initial frame and generated to create full forecast "video"
        self.videos_fake = tf.stack([self.input_images, self.videos_fake], axis=1)

        self.d_fake, _ = self.discriminator(self.videos_fake, reuse=True)

        self.g_cost_pure = -tf.reduce_mean(self.d_fake)

        # self.g_cost = self.g_cost_pure + 1000 * self.gen_reg

        self.d_cost = tf.reduce_mean(self.d_fake) - tf.reduce_mean(self.d_real)

        self.videos = tf.reshape(self.videos, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels])
        self.videos_fake = tf.reshape(self.videos_fake, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels])

        help_v = [0,0,0,0,0]
        par = 0
        for c,k in zip(self.wvars, range(5)):
            if c == '1':
                help_v[k] = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.videos[:,:,:,:,par], self.videos_fake[:,:,:,:,par]))))
                par += 1
            else:
                help_v[k] = tf.constant(0.0)

        self.rmse_temp = help_v[0]
        self.rmse_cc = help_v[1]
        self.rmse_sh = help_v[2]
        self.rmse_sp = help_v[3]
        self.rmse_geo = help_v[4]

        tf.summary.scalar('rmse_temp', self.rmse_temp)
        tf.summary.scalar('rmse_cc', self.rmse_cc)
        tf.summary.scalar('rmse_sh', self.rmse_sh)
        tf.summary.scalar('rmse_sp', self.rmse_sp)
        tf.summary.scalar('rmse_geo', self.rmse_geo)

        self.rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.videos, self.videos_fake))))

        # self.mae = tf.metrics.mean_absolute_error(self.videos_fake, self.videos)

        # error of discriminator failing to evaluate generated sample as fake - good job generator
        tf.summary.scalar("g_cost_pure", self.g_cost_pure)
        # diff between original image and created image/sequence in generator
        tf.summary.scalar("g_cost_regularizer", self.gen_reg)
        # error of - saying fake is fake and original is original (when fake == orig and orig == fake)
        tf.summary.scalar("d_cost", self.d_cost)
        
        tf.summary.scalar("RMSE_overal", self.rmse)
        # tf.summary.tensor_summary("MAE", self.mae)

        alpha = tf.random_uniform(
            shape=[self.batch_size, 1],
            minval=0.,
            maxval=1.
        )

        dim = self.frame_size * self.crop_size * self.crop_size * self.channels

        vid = tf.reshape(self.videos, [self.batch_size, dim])
        fake = tf.reshape(self.videos_fake, [self.batch_size, dim])
        differences = fake - vid
        interpolates = vid + (alpha * differences)
        d_hat, _ = self.discriminator(tf.reshape(interpolates, [self.batch_size, self.frame_size, self.crop_size,
                                                                self.crop_size, self.channels]), reuse=True)
        gradients = tf.gradients(d_hat, [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)

        self.d_penalty = 10 * gradient_penalty

        tf.summary.scalar('d_penalty', self.d_penalty)

        self.d_cost_final = self.d_cost + self.d_penalty

        tf.summary.scalar("d_cost_penalized", self.d_cost_final)

        self.d_adam, self.g_adam = None, None
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            self.d_adam = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \
                .minimize(self.d_cost_final, var_list=self.discriminator_variables)
            self.g_adam_gan = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \
                .minimize(self.g_cost_pure, var_list=self.generator_variables)
            self.g_adam_first = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \
                .minimize(self.gen_reg, var_list=self.generator_variables)

        self.sample = self.videos_fake
        self.summary_op = tf.summary.merge_all()
Beispiel #41
0
    with tf.name_scope("pool3"):
        pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
        pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])
        pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=training)
    
    with tf.name_scope("fc1"):
        fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1")
        fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training)
    
    with tf.name_scope("output"):
        logits = tf.layers.dense(fc1, n_outputs, name="output")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.GradientDescentOptimizer(0.05)
        training_op = optimizer.minimize(loss)
    
    init = tf.global_variables_initializer()

with tf.device('/CPU:0'):
    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

saver = tf.train.Saver()

#graph = tf.get_default_graph()
#writer = tf.summary.FileWriter("./simple_graph_events2")
#writer.add_graph(graph=graph)
                         padding='SAME')

YY = tf.reshape(Y3_pool, shape=[-1, 6 * 1 * M])

Y4l = tf.matmul(YY, W4)
Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4)
Y4r = tf.nn.relu(Y4bn)
Y4 = tf.nn.dropout(Y4r, pkeep)
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)

update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4)

cross_entropy_ = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits,
                                                         labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy_) * 100

correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print('Model built!')

dataX, dataY = load_data(test_file)
dataset = Dataset(dataX, dataY)
__X, __Y = dataset.minibatch(len(dataY))

print("Data loaded!")

prediction = []

for k in range(k_fold):
Beispiel #43
0
    def __init__(
            self, sequence_length, num_classes, vocab_size,
            embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        """
        <Variable>
            - W: 각 단어의 임베디드 벡터의 성분을 랜덤하게 할당
        """
        #with tf.device('/gpu:0'), tf.name_scope("embedding"):
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)


        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)

        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")    # xw_plus_b = matmul(x, W) + b
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate Mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)

            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Beispiel #44
0
    def __init__(self,
                 is_training=True,
                 vocab_len=None,
                 tw_vocab_len=None,
                 vocab_overlap=None):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x = tf.placeholder(tf.int32,
                                        shape=(hp.batch_size, hp.max_turn,
                                               hp.maxlen))
                self.x_length = tf.placeholder(tf.int32,
                                               shape=(hp.batch_size,
                                                      hp.max_turn))
                self.y = tf.placeholder(tf.int32,
                                        shape=(hp.batch_size, hp.maxlen))
                self.y_twrp = tf.placeholder(tf.int32,
                                             shape=(hp.batch_size, hp.maxlen))
                self.y_tw = tf.placeholder(tf.int32,
                                           shape=(hp.batch_size, hp.tw_maxlen))
                self.y_decoder_input = tf.placeholder(tf.int32,
                                                      shape=(hp.batch_size,
                                                             hp.maxlen))
            else:
                # inference
                self.x = tf.placeholder(tf.int32,
                                        shape=(hp.batch_size, hp.max_turn,
                                               hp.maxlen))
                self.x_length = tf.placeholder(tf.int32,
                                               shape=(hp.batch_size,
                                                      hp.max_turn))
                self.y = tf.placeholder(tf.int32,
                                        shape=(hp.batch_size, hp.maxlen))
                self.y_tw = tf.placeholder(tf.int32,
                                           shape=(hp.batch_size, hp.tw_maxlen))
                self.y_decoder_input = tf.placeholder(tf.int32,
                                                      shape=(hp.batch_size,
                                                             hp.maxlen))
                self.tw_vocab_overlap = tf.constant(vocab_overlap,
                                                    name='Const',
                                                    dtype='float32')

            # define decoder inputs
            self.decoder_inputs = tf.concat(
                (tf.ones_like(self.y_decoder_input[:, :1]) * 2,
                 self.y_decoder_input[:, :-1]), -1)  # 2:<S>

            ## Word Embedding
            self.enc_embed = get_token_embeddings(tf.reshape(
                self.x, [-1, hp.maxlen]),
                                                  vocab_size=vocab_len,
                                                  num_units=hp.hidden_units)

            ## Topic Word Embedding
            self.tw_embed = get_token_embeddings(self.y_tw,
                                                 vocab_size=vocab_len,
                                                 num_units=hp.hidden_units)

            ## Word Embedding
            self.dec_embed = get_token_embeddings(self.decoder_inputs,
                                                  vocab_size=vocab_len,
                                                  num_units=hp.hidden_units)

            # Get Vocab Embedding
            self.embeddings = get_token_embeddings(inputs=None,
                                                   vocab_size=vocab_len,
                                                   num_units=hp.hidden_units,
                                                   get_embedtable=True)

            # Encoder
            with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
                # Hierarchical Self-Attention reshape
                self.x_resha = tf.reshape(self.x,
                                          [-1, hp.maxlen])  # (N, S_maxlen)

                # Word src_masks
                src_masks_w = tf.math.equal(self.x_resha, 0)  # (N, S_maxlen)

                ## Word Positional Encoding
                self.enc = self.enc_embed + positional_encoding(
                    self.enc_embed, hp.maxlen)
                self.enc = tf.layers.dropout(self.enc,
                                             hp.dropout_rate,
                                             training=is_training)

                ## Word Blocks
                for i in range(hp.num_blocks_w):
                    with tf.variable_scope("num_blocks_w{}".format(i),
                                           reuse=tf.AUTO_REUSE):
                        # self-attention
                        self.enc, self.att_w = multihead_attention(
                            queries=self.enc,
                            keys=self.enc,
                            values=self.enc,
                            key_masks=src_masks_w,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            training=is_training,
                            causality=True)
                        # feed forward
                        self.enc = ff(
                            self.enc,
                            num_units=[4 * hp.hidden_units, hp.hidden_units])

                # Hierarchical Self-Attention reshape
                self.enc = tf.reshape(
                    self.enc,
                    [hp.batch_size, hp.max_turn, hp.maxlen, hp.hidden_units])
                self.enc = tf.reduce_mean(self.enc, axis=2)  # (N,max_turn,C)

                # Utterance which has been padded makes the Utterance vector for 0 regardless of self-attention
                x_length_mat = tf.not_equal(self.x_length, 0)  # (N, max_turn)
                x_length_mat = tf.expand_dims(x_length_mat,
                                              -1)  # (N, max_turn, 1)
                x_length_mat = tf.tile(x_length_mat,
                                       multiples=[1, 1, hp.hidden_units
                                                  ])  # (N, max_turn, C)
                zeros_mat = tf.zeros(
                    [hp.batch_size, hp.max_turn, hp.hidden_units],
                    dtype=tf.float32)
                self.enc = tf.where(x_length_mat, self.enc, zeros_mat)

                self.enc = ff(self.enc,
                              num_units=[4 * hp.hidden_units, hp.hidden_units])

                # Uatterance src_masks
                src_masks_u = tf.math.equal(self.x_length, 0)  # (N, max_turn)

                ## Uatterance Positional Encoding
                self.enc = self.enc + positional_encoding(
                    self.enc, hp.max_turn)
                self.enc = tf.layers.dropout(self.enc,
                                             hp.dropout_rate,
                                             training=is_training)

                ## Uatterance Blocks
                for i in range(hp.num_blocks_u):
                    with tf.variable_scope("num_blocks_u{}".format(i),
                                           reuse=tf.AUTO_REUSE):
                        # self-attention
                        self.enc, self.att_u = multihead_attention(
                            queries=self.enc,
                            keys=self.enc,
                            values=self.enc,
                            key_masks=src_masks_u,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            training=is_training,
                            causality=False)
                        # feed forward
                        self.enc = ff(
                            self.enc,
                            num_units=[4 * hp.hidden_units, hp.hidden_units])

            # Decoder
            with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):
                # tgt_masks
                tgt_masks = tf.math.equal(self.decoder_inputs, 0)  # (N, T2)

                ## Positional Encoding
                self.dec = self.dec_embed + positional_encoding(
                    self.dec_embed, hp.maxlen)
                self.dec = tf.layers.dropout(self.dec,
                                             hp.dropout_rate,
                                             training=is_training)

                # Blocks
                for i in range(hp.num_blocks):
                    with tf.variable_scope("num_blocks_{}".format(i),
                                           reuse=tf.AUTO_REUSE):
                        # Masked self-attention (Note that causality is True at this time)
                        self.dec, _ = multihead_attention(
                            queries=self.dec,
                            keys=self.dec,
                            values=self.dec,
                            key_masks=tgt_masks,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            training=is_training,
                            causality=True,
                            scope="self_attention")

                        # Vanilla attention
                        self.dec, self.att_v = multihead_attention(
                            queries=self.dec,
                            keys=self.enc,
                            values=self.enc,
                            key_masks=src_masks_u,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            training=is_training,
                            causality=False,
                            scope="vanilla_attention")
                        ### Feed Forward
                        self.dec = ff(
                            self.dec,
                            num_units=[4 * hp.hidden_units, hp.hidden_units])

                        if i >= hp.num_blocks - 1:
                            self.future_blindness, _ = multihead_attention(
                                queries=self.dec,
                                keys=self.dec,
                                values=self.dec,
                                key_masks=tgt_masks,
                                num_heads=hp.num_heads,
                                dropout_rate=hp.dropout_rate,
                                training=is_training,
                                causality=True,
                                scope="self_attention")

                ## Topic Word Attention
                self.twdec = topic_word_attention(
                    queries_hidden=self.future_blindness,
                    queries_context=self.enc,
                    keys=self.tw_embed,
                    dropout_rate=hp.dropout_rate,
                    training=is_training,
                    scope="topic_word_attention")

                self.ct_tw_dec = self.dec + self.twdec

                ### Feed Forward
                self.ct_tw_dec = ff(
                    self.ct_tw_dec,
                    num_units=[4 * hp.hidden_units, hp.hidden_units],
                    scope="tw_context_feedforward")

            # Final linear projection (embedding weights are shared)
            self.weights = tf.transpose(
                self.embeddings)  # (d_model, vocab_size)
            self.logits_c = tf.einsum('ntd,dk->ntk', self.dec,
                                      self.weights)  # (N, T_q, vocab_size)
            self.logits_t = tf.layers.dense(
                self.ct_tw_dec, tw_vocab_len)  # (N, T_q, tw_vocab_size)

            if is_training:
                # Loss_context
                self.y_smoothed_c = label_smoothing(
                    tf.one_hot(self.y,
                               depth=vocab_len))  # (N, T_q, vocab_size)
                self.ce_c = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.logits_c, labels=self.y_smoothed_c)  # (N, T_q)
                self.nonpadding_c = tf.to_float(tf.not_equal(
                    self.y, 0))  # 0: <pad> #(N,T_q)
                self.loss_c = tf.reduce_sum(self.ce_c * self.nonpadding_c) / (
                    tf.reduce_sum(self.nonpadding_c) + 1e-7)

                # Loss_topic
                self.y_smoothed_t = label_smoothing(
                    tf.one_hot(self.y_twrp,
                               depth=tw_vocab_len))  # (N, T_q, tw_vocab_size)
                self.ce_t = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.logits_t, labels=self.y_smoothed_t)  # (N, T_q)
                self.noncost_unk = tf.to_float(tf.not_equal(self.y_twrp,
                                                            1))  # 1: <unk>
                self.noncost_pad = tf.to_float(tf.not_equal(self.y_twrp,
                                                            0))  # 0: <pad>
                self.noncost_t = self.noncost_unk * self.noncost_pad
                self.loss_t = tf.reduce_sum(self.ce_t * self.noncost_t) / (
                    tf.reduce_sum(self.noncost_t) + 1e-7)

                # Loss
                self.loss = self.loss_c + self.loss_t * hp.penalty
                self.global_step = tf.train.get_or_create_global_step()
                self.lr = noam_scheme(hp.lr, self.global_step, hp.warmup_steps)
                self.optimizer = tf.train.AdamOptimizer(self.lr)
                self.train_op = self.optimizer.minimize(
                    self.loss, global_step=self.global_step)
            else:
                # inference
                self.prob_c = tf.nn.softmax(
                    self.logits_c)  # (N, T_q, vocab_size)
                self.prob_t = tf.nn.softmax(
                    self.logits_t)  # (N, T_q, tw_vocab_size)
                self.prob_t = tf.einsum(
                    'nlt,tv->nlv', self.prob_t,
                    self.tw_vocab_overlap)  # (N, T_q, vocab_size)
                self.prob = self.prob_c + self.prob_t * hp.penalty  # (N, T_q, vocab_size)
                self.preds = tf.to_int32(tf.argmax(self.prob,
                                                   axis=-1))  # (N, T_q)

                self.y_smoothed = label_smoothing(
                    tf.one_hot(self.y,
                               depth=vocab_len))  # (N, T_q, vocab_size)
                self.ce = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.prob, labels=self.y_smoothed)  # (N, T_q)
                self.ppl_step = tf.exp(self.ce)  # (N, T_q)
Beispiel #45
0
def build_mnist_model(num_hidden, decay, activation):
    x = tf.placeholder(dtype=tf.float32, shape=[None, args.x_dim])
    y = tf.placeholder(dtype=tf.float32, shape=[None, 1])
    is_training = tf.placeholder(dtype=tf.bool, shape=[])
    with tf.variable_scope('network'):
        out, reg, layers = feed_forward(x, num_hidden, decay, activation,
                                        is_training)

    rmse_loss = tf.reduce_mean(tf.reduce_sum(tf.square(y - out), 1))
    loss = rmse_loss + reg

    all_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='network')
    show_variables(all_weights)
    last_layer_weights = tf.get_collection(
        tf.GraphKeys.GLOBAL_VARIABLES,
        scope='network/dense_{}'.format(len(num_hidden) - 1))

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='network')
    for item in update_ops:
        print('Update {}'.format(item))

    lr_decay = tf.placeholder(dtype=tf.float32, shape=[])
    all_op = tf.train.GradientDescentOptimizer(args.lr * lr_decay)
    all_grads = all_op.compute_gradients(loss=loss, var_list=all_weights)
    all_train_op = all_op.apply_gradients(grads_and_vars=all_grads)

    lr = args.lr * lr_decay
    TEMPERATURE = 1e-8
    noise_train_ops = []
    for g, v in all_grads:
        if g is None:
            continue
        noise_train_ops.append(
            tf.assign(
                v, v - lr * g - tf.sqrt(lr) * TEMPERATURE *
                tf.random_normal(v.shape, stddev=1)))

    all_train_op_noise = tf.group(noise_train_ops)
    lst_op = tf.train.GradientDescentOptimizer(args.lr * lr_decay)
    lst_grads = lst_op.compute_gradients(loss=loss,
                                         var_list=last_layer_weights)
    lst_train_op = lst_op.apply_gradients(grads_and_vars=lst_grads)
    reset_lst_op = tf.variables_initializer(lst_op.variables())
    reset_all_op = tf.variables_initializer(all_op.variables())

    weight_dict = {}
    for item in all_weights:
        if 'kernel' in item.name:
            weight_dict[item.name] = item
    print('weights to be saved')
    print(weight_dict)

    ph = {'x': x, 'y': y, 'lr_decay': lr_decay, 'is_training': is_training}
    ph['kernel_l0'] = tf.placeholder(
        dtype=tf.float32,
        shape=weight_dict['network/dense_0/kernel:0'].get_shape())
    #ph['bias_l0'] = tf.placeholder(dtype=tf.float32, shape=weight_dict['network/dense_0/bias:0'].get_shape())

    targets = {
        'layers': layers,
        'all': {
            'weights': all_weights,
            'train': all_train_op,
            'rmse_loss': rmse_loss,
            'update': update_ops,
            'reg_loss': reg
        },
        'all_noise': {
            'weights': all_weights,
            'train': all_train_op_noise,
            'rmse_loss': rmse_loss,
            'update': update_ops,
            'reg_loss': reg
        },
        'lst': {
            'weights': all_weights,
            'train': lst_train_op,
            'rmse_loss': rmse_loss,
            'update': update_ops,
            'reg_loss': reg
        },
        'eval': {
            'weights': weight_dict,
            'rmse_loss': rmse_loss,
            'out': out
        },
        'assign_weights': {
            'weights_l0':
            tf.assign(weight_dict['network/dense_0/kernel:0'],
                      ph['kernel_l0']),
            #'bias': tf.assign(weight_dict['network/dense_0/bias:0'], ph['bias_l0']),
        },
        'reset': {
            'lst': reset_lst_op,
            'all': reset_all_op
        }
    }

    return ph, targets
Beispiel #46
0
tf.reset_default_graph()
doc_vectors = tf.placeholder(dtype=tf.float32,shape=[None, vocab_size], name='doc_vectors')
y = tf.placeholder(tf.float32, [None, 1], name='y')

# <codecell>

learning_rate = 0.01

# <codecell>

layer_one_output = fully_connected(doc_vectors, 100, activation_fn=tf.nn.relu)
logits = fully_connected(layer_one_output,1, activation_fn=None)
prob = tf.nn.sigmoid(logits, name='prob')

x_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits, name='x_entropy')
loss = tf.reduce_mean(x_entropy, name='loss')

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss, name='train_op')

# <codecell>

file_writer = tf.summary.FileWriter('tf_logs/logistic_regression', tf.get_default_graph())

# <codecell>

init = tf.global_variables_initializer()
saver = tf.train.Saver()
sess = tf.InteractiveSession()
init.run()
Beispiel #47
0
            tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)
        )
        
        embed = tf.nn.embedding_lookup(embeddings, train_inputs)
        
        nce_weights = tf.Variable(
            tf.truncated_normal([vocabulary_size, embedding_size],
                               stddev=1.0 / math.sqrt(embedding_size))
        )
        
        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
        
    loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights,
                                        biases=nce_biases,
                                         labels=train_labels,
                                         inputs=embed,
                                         num_sampled=num_sampled,
                                         num_classes=vocabulary_size
                                        ))
    
    optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

    norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
    normalized_embeddings = embeddings / norm
    valid_embeddings = tf.nn.embedding_lookup(
        normalized_embeddings, valid_dataset
    )
    similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b = True)

    init = tf.global_variables_initializer()
W4 = tf.get_variable("W4",
                     shape=[128 * 4 * 4, 625],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([625]))
L4 = tf.nn.relu(tf.matmul(L3_flat, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

W5 = tf.get_variable("W5",
                     shape=[625, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5
L5 = tf.nn.relu(hypothesis)
L5 = tf.nn.dropout(L5, keep_prob=keep_prob)

cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

###
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# train my model
print('Learning started. It takes sometime.')
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
Beispiel #49
0
def train(train_data_set, val_data_set, load_model_path, save_model_path):
    x = tf.placeholder(
        tf.float32,
        shape=[
            None,
            sub_Config.IMAGE_W,
            sub_Config.IMAGE_H,
            sub_Config.IMAGE_CHANNEL
        ],
        name='input_x'
    )
    y_ = tf.placeholder(
        tf.float32,
        shape=[
            None,
        ]
    )
    tf.summary.histogram(
        'label',
        y_
    )
    global_step = tf.Variable(0, trainable=False)
    # variable_average = tf.train.ExponentialMovingAverage(
    #     sub_Config.MOVING_AVERAGE_DECAY,
    #     global_step
    # )
    # vaeriable_average_op = variable_average.apply(tf.trainable_variables())
    # regularizer = tf.contrib.layers.l2_regularizer(sub_Config.REGULARIZTION_RATE)
    is_training = tf.placeholder('bool', [], name='is_training')
    FLAGS = tf.app.flags.FLAGS
    tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar-data',
                               'where to store the dataset')
    tf.app.flags.DEFINE_boolean('use_bn', True, 'use batch normalization. otherwise use biases')
    y = inference_small(x, is_training=is_training,
                        num_classes=sub_Config.OUTPUT_NODE,
                        use_bias=FLAGS.use_bn,
                        num_blocks=3)
    tf.summary.histogram(
        'logits',
        tf.argmax(y, 1)
    )
    loss_ = loss(
        logits=y,
        labels=tf.cast(y_, np.int32)

    )
    tf.summary.scalar(
        'loss',
        loss_
    )
    train_op = tf.train.GradientDescentOptimizer(
        learning_rate=sub_Config.LEARNING_RATE
    ).minimize(
        loss=loss_,
        global_step=global_step
    )
    # with tf.control_dependencies([train_step, vaeriable_average_op]):
    #     train_op = tf.no_op(name='train')

    with tf.variable_scope('accuracy'):
        accuracy_tensor = tf.reduce_mean(
            tf.cast(
                tf.equal(x=tf.argmax(y, 1), y=tf.cast(y_, tf.int64)),
                tf.float32
            )
        )
        tf.summary.scalar(
            'accuracy',
            accuracy_tensor
        )
    saver = tf.train.Saver()
    merge_op = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        if load_model_path:
            saver.restore(sess, load_model_path)
        writer = tf.summary.FileWriter('./log/fine_tuning/train', tf.get_default_graph())
        val_writer = tf.summary.FileWriter('./log/fine_tuning/val', tf.get_default_graph())
        for i in range(sub_Config.ITERATOE_NUMBER):
            images, labels = train_data_set.get_next_batch(sub_Config.BATCH_SIZE, sub_Config.BATCH_DISTRIBUTION)
            images = changed_shape(images, [
                    len(images),
                    sub_Config.IMAGE_W,
                    sub_Config.IMAGE_W,
                    sub_Config.IMAGE_CHANNEL
                ])
            _, loss_value, accuracy_value, summary, global_step_value = sess.run(
                [train_op, loss_, accuracy_tensor, merge_op, global_step],
                feed_dict={
                    x: images,
                    y_: labels
                }
            )
            writer.add_summary(
                summary=summary,
                global_step=global_step_value
            )
            if i % 100 == 0 and i != 0 and save_model_path is not None:
                # 保存模型 五分类每500步保存一下模型
                import os
                save_path = os.path.join(save_model_path, str(global_step_value))
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                save_path += '/model.ckpt'
                print 'mode saved path is ', save_path
                saver.save(sess, save_path)
            if i % 100 == 0:
                validation_images, validation_labels = val_data_set.get_next_batch()
                validation_images = changed_shape(
                    validation_images,
                    [
                        len(validation_images),
                        sub_Config.IMAGE_W,
                        sub_Config.IMAGE_W,
                        1
                    ]
                )
                validation_accuracy, validation_loss, summary, logits = sess.run(
                    [accuracy_tensor, loss_, merge_op, y],
                    feed_dict={
                        x: validation_images,
                        y_: validation_labels
                    }
                )
                calculate_acc_error(
                    logits=np.argmax(logits, 1),
                    label=validation_labels,
                    show=True
                )
                binary_acc = acc_binary_acc(
                    logits=np.argmax(logits, 1),
                    label=validation_labels,
                )
                val_writer.add_summary(summary, global_step_value)
                print 'step is %d,training loss value is %g,  accuracy is %g ' \
                      'validation loss value is %g, accuracy is %g, binary_acc is %g' % \
                      (global_step_value, loss_value, accuracy_value, validation_loss, validation_accuracy, binary_acc)
        writer.close()
        val_writer.close()
Beispiel #50
0
# Getting final output through indexing after reversing
last_output = outputs[-1]

# As rnn model output the final layer through Relu activation softmax is
# used for final output.
output = tf.nn.softmax(last_output)

# Computing the Cross Entropy loss
cross_entropy = -tf.reduce_sum(y * tf.log(output))

# Trainning with Adadelta Optimizer
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

# Calculatio of correct prediction and accuracy
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output, 1))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32))) * 100

# # Dataset Preparation


# Function to get on hot
def get_on_hot(number):
    on_hot = [0] * 10
    on_hot[number] = 1
    return on_hot


# Using Sklearn MNIST dataset.
digits = datasets.load_digits()
X = digits.images
Y_ = digits.target
Beispiel #51
0
v2 = tf.Variable([[-1., -2], [1., -21.]])


# In[17]:

lmbd = far.get_hyperparameter('lambda', 
                              initializer=tf.ones_initializer, shape=v2.get_shape())

reg2 = far.get_hyperparameter('reg2', 0.1)

eta = far.get_hyperparameter('eta', 0.1)
beta1 = far.get_hyperparameter('beta1', 1.)
beta2 = far.get_hyperparameter('beta2', 2.)

# noinspection PyTypeChecker
cost = tf.reduce_mean(v1**2) + tf.reduce_sum(lmbd*v2**2) + reg2*tf.nn.l2_loss(v1)

io_optim = far.AdamOptimizer(eta, tf.nn.sigmoid(beta1), tf.nn.sigmoid(beta2), epsilon=1.e-4)

oo = tf.reduce_mean(v1*v2)

rhg = far.ReverseHG()

optim_oo = tf.train.AdamOptimizer()
# ts_hy = optim_oo.apply_gradients(rhg.hgrads_hvars())
farho = far.HyperOptimizer(rhg)
run = farho.minimize(oo, optim_oo, cost, io_optim)

print(tf.global_variables())

print(far.utils.hyperparameters())
Beispiel #52
0
    layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, weights['decoder_h4']),
                                biases['decoder_b4']))
    return layer_4
"""

# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)


# Launch the graph
with tf.Session() as sess:
    # tf.initialize_all_variables() no long valid from
    # 2017-03-02 if using tensorflow >= 0.12
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)
    total_batch = int(mnist.train.num_examples/batch_size)
    # Training cycle
    for epoch in range(training_epochs):
Beispiel #53
0
import tensorflow as tf

tf.set_random_seed(777)  # for reproducibility

# tf Graph Input
X = [1, 2, 3]
Y = [1, 2, 3]

# Set wrong model weights
W = tf.Variable(5.)

# Linear model
hypothesis = X * W

# Manual gradient
gradient = tf.reduce_mean((W * X - Y) * X) * 2

# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Minimize: Gradient Descent Magic
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cost)

# Get gradients
gvs = optimizer.compute_gradients(cost)
# Apply gradients
capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
apply_gradients = optimizer.apply_gradients(capped_gvs)

# Launch the graph in a session.
Beispiel #54
0
import tensorflow as tf
import numpy as np

  
#create data
x_data = np.random.rand(100).astype(np.float32)
y_data = x_data * 0.1 + 0.3

###create tensorflow structure start###
Weights = tf.Variable(tf.random_uniform([1],-1,1))
biases = tf.Variable((tf.zeros([1])))

y = Weights * x_data + biases

loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)

init = tf.global_variables_initializer()
###created tensorflow structure end###

sess = tf.Session()
#激活神经网络
sess.run(init)

for step in range(201):
    sess.run(train)
    if step % 20 == 0:
        print(step,sess.run(Weights),sess.run(biases))
image_size = mnist.train.images.shape[1]

inputs_ = tf.placeholder(tf.float32, (None, image_size), name='inputs')
targets_ = tf.placeholder(tf.float32, (None, image_size), name='targets')

# Output of hidden layer
encoded = tf.layers.dense(inputs_, encoding_dim, activation=tf.nn.relu)

# Output layer logits
logits = tf.layers.dense(encoded, image_size, activation=None)
# Sigmoid output from
decoded = tf.nn.sigmoid(logits, name='output')

loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits)
cost = tf.reduce_mean(loss)
opt = tf.train.AdamOptimizer(0.001).minimize(cost)
# Create the session
sess = tf.Session()
epochs = 20
batch_size = 200
sess.run(tf.global_variables_initializer())
for e in range(epochs):
    for ii in range(mnist.train.num_examples//batch_size):
        batch = mnist.train.next_batch(batch_size)
        feed = {inputs_: batch[0], targets_: batch[0]}
        batch_cost, _ = sess.run([cost, opt], feed_dict=feed)

        print("Epoch: {}/{}...".format(e+1, epochs),
              "Training loss: {:.4f}".format(batch_cost))
              fig, axes = plt.subplots(nrows=2, ncols=10, sharex=True, sharey=True, figsize=(20,4))
Beispiel #56
0
    def onSetup(self):
        #==================== Initialize ====================#
        # File Path
        self.scripts_path = ue.get_content_dir() + "Scripts"
        self.model_directory = self.scripts_path + "/model"
        self.model_path = self.model_directory + "/model.ckpt"

        # Game
        self.Sequence = 1
        self.PlayNumber = 1

        # Epsilon
        self.Epsilon = EPSILONMINVALUE

        # ReplayMemory
        self.Memory = ReplayMemory()
        self.LastAction = -1

        # State
        self.reset()

        #==================== Hypothesis ====================#
        self.input = tf.placeholder(tf.float32, shape=[None, INPUTS])

        # Model
        w1 = tf.Variable(tf.truncated_normal(shape=[INPUTS, HIDDEN1S],
                                             stddev=1.0 /
                                             math.sqrt(float(INPUTS))),
                         dtype=tf.float32,
                         name='w1')
        b1 = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S], stddev=0.01),
                         dtype=tf.float32,
                         name='b1')
        hidden1 = tf.nn.relu(tf.matmul(self.input, w1) + b1, name='hidden1')

        w2 = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S, HIDDEN2S],
                                             stddev=1.0 /
                                             math.sqrt(float(HIDDEN1S))),
                         dtype=tf.float32,
                         name='w2')
        b2 = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S], stddev=0.01),
                         dtype=tf.float32,
                         name='b2')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, w2) + b2, name='hidden2')

        wo = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S, OUTPUTS],
                                             stddev=1.0 /
                                             math.sqrt(float(HIDDEN2S))),
                         dtype=tf.float32,
                         name='wo')
        bo = tf.Variable(tf.truncated_normal(shape=[OUTPUTS], stddev=0.01),
                         dtype=tf.float32,
                         name='bo')
        self.output = tf.matmul(hidden2, wo) + bo

        # Target
        w1_t = tf.Variable(tf.truncated_normal(shape=[INPUTS, HIDDEN1S],
                                               stddev=1.0 /
                                               math.sqrt(float(INPUTS))),
                           dtype=tf.float32,
                           name='w1_t')
        b1_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S], stddev=0.01),
                           dtype=tf.float32,
                           name='b1_t')
        hidden1_t = tf.nn.relu(tf.matmul(self.input, w1_t) + b1_t,
                               name='hidden1')

        w2_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S, HIDDEN2S],
                                               stddev=1.0 /
                                               math.sqrt(float(HIDDEN1S))),
                           dtype=tf.float32,
                           name='w2_t')
        b2_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S], stddev=0.01),
                           dtype=tf.float32,
                           name='b2_t')
        hidden2_t = tf.nn.relu(tf.matmul(hidden1_t, w2_t) + b2_t,
                               name='hidden2')

        wo_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S, OUTPUTS],
                                               stddev=1.0 /
                                               math.sqrt(float(HIDDEN2S))),
                           dtype=tf.float32,
                           name='wo_t')
        bo_t = tf.Variable(tf.truncated_normal(shape=[OUTPUTS], stddev=0.01),
                           dtype=tf.float32,
                           name='bo_t')
        self.output_t = tf.matmul(hidden2_t, wo_t) + bo_t

        # Cost & Optimizer
        self.target = tf.placeholder(tf.float32, shape=[None, OUTPUTS])
        self.cost = tf.reduce_mean(tf.square(self.output - self.target)) / 2
        self.optimizer = tf.train.AdamOptimizer(LEARNINGRATE).minimize(
            self.cost)

        #==================== Session & Saver ====================#
        self.sess = tf.Session()
        self.saver = tf.train.Saver()
        ue.log('######################################################')
        try:
            self.saver.restore(self.sess, self.model_path)
            ue.log('#################### loaded model ####################')
        except:
            self.sess.run(tf.global_variables_initializer())
            ue.log('################## no stored model ##################')
        ue.log('######################################################')
        pass
Beispiel #57
0
def kl_loss(mean, logvar):
    # shape : [batch_size, channel]
    loss = 0.5 * tf.reduce_sum(tf.square(mean) + tf.exp(logvar) - 1 - logvar, axis=-1)
    loss = tf.reduce_mean(loss)

    return loss
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# placeholder for data
x = tf.placeholder(tf.float32, [None, 784])
# placeholder that turns BN during training or off during inference
train_phase = tf.placeholder(tf.bool, name='phase_train')
# variables for parameters
hiden_units = 25
layer1 = get_NN_layer(x, input_dim=784, output_dim=hiden_units, scope='layer1', train_phase=train_phase)
# create model
W_final = tf.Variable(tf.truncated_normal(shape=[hiden_units, 10], mean=0.0, stddev=0.1))
b_final = tf.Variable(tf.constant(0.1, shape=[10]))
y = tf.nn.softmax(tf.matmul(layer1, W_final) + b_final)

### training
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]) )
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    steps = 3000
    for iter_step in xrange(steps):
        #feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train)
        batch_xs, batch_ys = mnist.train.next_batch(100)
        # Collect model statistics
        if iter_step%1000 == 0:
            batch_xstrain, batch_xstrain = batch_xs, batch_ys #simualtes train data
            batch_xcv, batch_ycv = mnist.test.next_batch(5000) #simualtes CV data
            batch_xtest, batch_ytest = mnist.test.next_batch(5000) #simualtes test data
            # do inference
            train_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xs, y_:batch_ys, train_phase: False})
            cv_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xcv, y_:batch_ycv, train_phase: False})
Beispiel #59
0
def generator_loss(Ra, loss_func, real, fake):
    # Ra = Relativistic
    fake_loss = 0
    real_loss = 0

    if Ra and loss_func.__contains__('wgan'):
        print("No exist [Ra + WGAN], so use the {} loss function".format(loss_func))
        Ra = False

    if Ra:
        fake_logit = (fake - tf.reduce_mean(real))
        real_logit = (real - tf.reduce_mean(fake))

        if loss_func == 'lsgan':
            fake_loss = tf.reduce_mean(tf.square(fake_logit - 1.0))
            real_loss = tf.reduce_mean(tf.square(real_logit + 1.0))

        if loss_func == 'gan' or loss_func == 'gan-gp' or loss_func == 'dragan':
            fake_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(fake), logits=fake_logit))
            real_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(real), logits=real_logit))

        if loss_func == 'hinge':
            fake_loss = tf.reduce_mean(relu(1.0 - fake_logit))
            real_loss = tf.reduce_mean(relu(1.0 + real_logit))

    else:
        if loss_func.__contains__('wgan'):
            fake_loss = -tf.reduce_mean(fake)

        if loss_func == 'lsgan':
            fake_loss = tf.reduce_mean(tf.square(fake - 1.0))

        if loss_func == 'gan' or loss_func == 'gan-gp' or loss_func == 'dragan':
            fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(fake), logits=fake))

        if loss_func == 'hinge':
            fake_loss = -tf.reduce_mean(fake)

    loss = fake_loss + real_loss

    return loss
Beispiel #60
0
def kl_loss_2(mean, var):
    # shape : [batch_size, channel]
    loss = 0.5 * tf.reduce_sum(tf.square(mean) + tf.square(var) - tf.log(1e-8 + tf.square(var)) - 1, axis=-1)
    loss = tf.reduce_mean(loss)

    return loss