def __init__(self, shape, lambda1 = 0.1, lambda2 = 0.1, mu = 0.1):
        """Initialize the ChanVese segmenter

        Arguments:
        shape (required) -- size of the image to segment

        lambda1 (default : 0.1) -- The cost of labeling pixels type 1 (check the Class docstring). This argument (as well as lambda2) can be used if the segmentation should be biased in one direction or the other. It's not deterministic what bits of the image get labeled with either lambda though -- this (as well as lambda2) will likely be a bit of a guess and check parameter.

        lambda2 (default : 0.1) -- The cost of labeling pixels type 2 (check the Class docstring)

        mu (default : 0.1) -- This is the cost of having a boundary. A higher value will mean less boundaries
        """
        xs = range(3)
        ys = range(3)
        Xs, Ys = numpy.meshgrid(xs, ys)
        Rs = numpy.sqrt((Xs - 1.0)**2 + (Ys - 1.0)**2)

        kernelBlurCpu = numpy.exp(-Rs / (2.0 * 0.75**2)).astype('float32')
        kernelBlurCpu /= numpy.linalg.norm(kernelBlurCpu.flatten())
        
        self.kernel = tf.constant(kernelBlurCpu.reshape([3, 3, 1, 1]))

        self.I = tf.Variable(tf.truncated_normal(shape = [1, shape[0], shape[1], 1], mean = 0.0, stddev = 0.1))
        
        self.u1 = tf.Variable(1.0)
        self.u2 = tf.Variable(-1.0)

        self.G = tf.placeholder(tf.float32, shape = shape)

        self.Gv = tf.Variable(numpy.zeros([1, shape[0], shape[1], 1]).astype('float32'))
        self.initialize = self.Gv.assign(tf.reshape(self.G, shape = [1, shape[0], shape[1], 1]))
        self.initialize2 = self.I.assign(tf.reshape(self.G, shape = [1, shape[0], shape[1], 1]))

        self.blur = tf.nn.conv2d(self.I, self.kernel, strides = [1, 1, 1, 1], padding = 'SAME')

        self.Gv = tf.Variable(numpy.zeros([1, shape[0], shape[1], 1]).astype('float32'))

        self.u1m = tf.abs(self.blur - self.u1)
        self.u2m = tf.abs(self.blur - self.u2)

        ones = numpy.ones((1, shape[0], shape[1], 1)).astype('float32')
        zeros = numpy.zeros((1, shape[0], shape[1], 1)).astype('float32')

        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.mu = mu

        eta = 0.1
        self.conv = eta / (numpy.pi * (eta**2 + self.blur**2))

        self.u1t = self.lambda1 * tf.reduce_sum(tf.select(self.u2m > self.u1m, (self.Gv - self.u1)**2, zeros))
        self.u2t = self.lambda2 * tf.reduce_sum(tf.select(self.u2m <= self.u1m, (self.Gv - self.u2)**2, zeros))

        self.edgeLoss = self.mu * tf.reduce_sum(tf.abs(self.conv))

        self.loss = self.u1t + self.u2t + self.edgeLoss

        self.shape = shape

        self.train_step = tf.train.AdamOptimizer(1.0e-1).minimize(self.loss, var_list = [self.I, self.u1, self.u2])
Beispiel #2
0
def loss_estimate(batch_size,old_state,data,total_data,model_params,base_mean,base_log_var):
    clipped_log_vals, nan_mask, reset_rows = data


    zeros = tf.zeros_like(clipped_log_vals)
    state_init = model_params.init_state(batch_size)
    data_count = tf.reduce_sum(tf.to_float(tf.logical_not(nan_mask)),name='data_count')
    
    model_input = tf.select(nan_mask,zeros,clipped_log_vals)
    target_outputs = model_input

    sample_params = model_params.sample_vals(batch_size)

    #TODO verify significance of old_state
    filtered_state = tf.select(reset_rows,old_state,state_init)

    new_state,delta_mean = sample_inference(filtered_state,model_input,sample_params)
    variance = tf.exp(base_log_var)
    mean = base_mean + delta_mean * variance

    raw_losses = gaussian_neg_log_likelyhood(target_outputs,mean,variance)
    clean_raw_losses = tf.select(nan_mask,zeros,raw_losses)
    raw_loss = tf.reduce_sum(clean_raw_losses)

    kl_divergence = model_params.get_divergence()

    loss_estimate = raw_loss * (total_data / data_count) + kl_divergence

    return loss_estimate,new_state,kl_divergence
Beispiel #3
0
    def build_mh_update(self):
        with tf.name_scope("gold_model"):
            self.joint_density_gold = self.joint_density(**self.symbols_gold)

        with tf.name_scope("proposed_model"):
            self.joint_density_proposed = self.joint_density(**self.symbols_proposed)
        with tf.name_scope("mh_updates"):            
            self.mh_ratio = self.joint_density_proposed - self.joint_density_gold
            self.uniform = tf.placeholder(dtype=tf.float32, name="u")
            log_uniform = tf.log(self.uniform)
            self.accepted = log_uniform < self.mh_ratio 
            
            update_ops = []
            for name, latent in self.latents.items():
                next_val = tf.select(self.accepted, latent["proposed"], latent["gold"])
                update_ops.append(latent["gold"].assign(next_val))

            self.step_counter = tf.Variable(0)
            self.accept_counter = tf.Variable(0)
            self.accept_rate = tf.to_double(self.accept_counter) / tf.to_double(self.step_counter)
            update_ops.append(self.step_counter.assign_add(1))
            update_ops.append(self.accept_counter.assign_add(tf.select(self.accepted, 1, 0)))
            
            self.global_update = tf.group(*update_ops)
                
        return self.global_update
def evaluate_precision_recall(
    input_layer, labels, threshold=0.5, per_example_weights=None, name=PROVIDED, phase=Phase.train
):
    """Computes the precision and recall of the prediction vs the labels.

  Args:
    input_layer: A Pretty Tensor object.
    labels: The target labels to learn as a float tensor.
    threshold: The threshold to use to decide if the prediction is true.
    per_example_weights: A Tensor with a weight per example.
    name: An optional name.
    phase: The phase of this model; non training phases compute a total across
      all examples.
  Returns:
    Precision and Recall.
  """
    _ = name  # Eliminate warning, name used for namescoping by PT.
    selected, sum_retrieved, sum_relevant = _compute_precision_recall(
        input_layer, labels, threshold, per_example_weights
    )

    if phase != Phase.train:
        dtype = tf.float32
        # Create the variables in all cases so that the load logic is easier.
        relevant_count = tf.get_variable(
            "relevant_count",
            [],
            dtype,
            tf.zeros_initializer,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        retrieved_count = tf.get_variable(
            "retrieved_count",
            [],
            dtype,
            tf.zeros_initializer,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        selected_count = tf.get_variable(
            "selected_count",
            [],
            dtype,
            tf.zeros_initializer,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )

        with input_layer.g.device(selected_count.device):
            selected = tf.assign_add(selected_count, selected)
        with input_layer.g.device(retrieved_count.device):
            sum_retrieved = tf.assign_add(retrieved_count, sum_retrieved)
        with input_layer.g.device(relevant_count.device):
            sum_relevant = tf.assign_add(relevant_count, sum_relevant)

    return (
        tf.select(tf.equal(sum_retrieved, 0), tf.zeros_like(selected), selected / sum_retrieved),
        tf.select(tf.equal(sum_relevant, 0), tf.zeros_like(selected), selected / sum_relevant),
    )
Beispiel #5
0
    def _loss_x_entropy(self, x, z, noise=None):
        with tf.name_scope("xentropy_loss"):
            z_clipped = tf.clip_by_value(z, FLAGS.zero_bound, FLAGS.one_bound)
            z_minus_1_clipped = tf.clip_by_value((1.0 - z), FLAGS.zero_bound, FLAGS.one_bound)
            x_clipped = tf.clip_by_value(x, FLAGS.zero_bound, FLAGS.one_bound)
            x_minus_1_clipped = tf.clip_by_value((1.0 - x), FLAGS.zero_bound, FLAGS.one_bound)
            
            # cross_entropy = x * log(z) + (1 - x) * log(1 - z)
            
            cross_entropy = tf.add(tf.mul(tf.log(z_clipped), x_clipped),
                                   tf.mul(tf.log(z_minus_1_clipped), x_minus_1_clipped), name='X-Entr')

            if noise:
                with tf.name_scope("Given_Emphasis"):
                    a, b = self._get_emph_params
                    corrupted = tf.select(noise, cross_entropy, tf.zeros_like(cross_entropy), name='Corrupted_Emphasis')
                    
                    # OR -- tf.select(tf.logical_not(noisy_points), cross_entropy, tf.zeros_like(cross_entropy), name='Uncorrupted_Emphasis')
                    uncorrupted = tf.select(noise, tf.zeros_like(cross_entropy), cross_entropy, name='Uncorrupted_Emphasis')
                    
                    loss = a * (-1 * tf.reduce_sum(corrupted, 1)) + b * (-1 * tf.reduce_sum(uncorrupted, 1))
            else:
                # Sum the cost for each example
                loss = -1 * tf.reduce_sum(cross_entropy, 1)
        
            # Reduce mean to find the overall cost of the loss
            cross_entropy_mean = tf.reduce_mean(loss, name='xentropy_mean')
    
            return cross_entropy_mean
Beispiel #6
0
  def UpdateProbs(self, inp):
    """Update probabilities of each particle based on 2D matrix inp which is a 2D perspectiuve projection of the scene"""

    projection, onscreen = self.project()
    filtered_projection = tf.to_int64(tf.select(onscreen, projection, tf.zeros_like(projection)))
    per_state_probabilities = tf.gather_nd(inp, filtered_projection)
    
    filtered_probabilities = tf.select(onscreen, per_state_probabilities, tf.zeros_like(per_state_probabilities))
    
    new_state_indicies = tf.squeeze(tf.multinomial(tf.expand_dims(tf.log(filtered_probabilities),0), self.particles/10*9))
    
    new_state = tf.gather(self.state, new_state_indicies)
    
    # Add momentum
    new_state = tf.concat(1, [new_state[:, 0:3] + new_state[:, 3:6], new_state[:, 3:10]])
    
    # Add in particles for the "just come onscreen" case.
    new_state = tf.concat(0, [new_state, tf.random_normal([self.particles/10, 10]) * self.initial_std + self.initial_bias])

    
    new_state = new_state + tf.random_normal([self.particles, 10]) * self.update_std
    # Todo:  permute state by adding noise.

    
    return self.state.assign(new_state)
Beispiel #7
0
 def updatesome():
     if reverse:
         return tf.select(
             tf.greater_equal(time, max_sequence_length-lengths),
             new_state,
             old_state)
     else:
         return tf.select(tf.less(time, lengths), new_state, old_state)
 def testShapeMismatch(self):
   c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
   x = np.random.rand(1, 3, 2) * 100
   y = np.random.rand(2, 5, 3) * 100
   for t in [np.float32, np.float64, np.int32, np.int64, np.complex64]:
     xt = x.astype(t)
     yt = y.astype(t)
     with self.assertRaises(ValueError):
       tf.select(c, xt, yt)
Beispiel #9
0
    def _copy_some_through(new_output, new_alpha, new_attn_ids, new_lmbdas, new_state):
        # Use broadcasting select to determine which values should get
        # the previous state & zero output, and which values should get
        # a calculated state & output.

        # Alpha needs to be (batch, tasks, k)
        copy_cond = (time >= sequence_length)
        return ([tf.select(copy_cond, zero_output, new_output),
                 tf.select(copy_cond, zero_alpha, new_alpha), # (batch, tasks, k)
                 tf.select(copy_cond, zero_attn_ids, new_attn_ids),
                 tf.select(copy_cond, zero_lmbdas, new_lmbdas)] +
                [tf.select(copy_cond, old_s, new_s)
                 for (old_s, new_s) in zip(state, new_state)])
Beispiel #10
0
def _lcod(x, w_e, w_s, thresh, T):
    """
    Learned Coordinate Descent (LCoD). LCoD is an approximately sparse encoder. It
    approximates (in an L2 sense) a sparse code of `x` according to dictionary `w_e`.
    Note that during backpropagation, `w_e` isn't strictly a dictionary (i.e.
    dictionary atoms are not strictly normalized).

    LCoD is a differentiable version of greedy coordinate descent.

    Args:
      x: [n, n_f] tensor
      w_e: [n_f, n_c] encoder tensor
      w_s: [n_c, n_f] mutual inhibition tensor
      thresh: soft thresold
      T: number of iterations
    Returns:
      z: LCoD output
    """
    with tf.name_scope('itr_00'):
        b = tf.matmul(x, w_e, name='b')
        z = tf.zeros_like(b, dtype=tf.float32, name='z')

    for t in range(1, T):
        with tf.name_scope('itr_%02d' % t):
            z_bar = _st(b, thresh, name='z_bar')
            with tf.name_scope('greedy_heuristic'):
                # no tf.tile b/c tf.select will brodcast?
                if t > 1:
                    z_diff = tf.sub(z_bar, z, name='z_diff')
                else:
                    z_diff = z_bar
                abs_z_diff = tf.abs(z_diff, name='abs_z_diff')

                tmp = tf.reduce_max(abs_z_diff, 1, True)
                tmp2 = tf.equal(abs_z_diff, tmp)
                e = tf.select(tmp2, z_diff, tf.zeros_like(z_bar, dtype=tf.float32),
                           name='e')
                ks = tf.argmax(abs_z_diff, 1, name='ks')
                
            with tf.name_scope('update_b'):
                s_slices = tf.gather(w_s, ks, name='s_slices')
                b = tf.add(b, tf.mul(e, s_slices), name='b')

            with tf.name_scope('update_z'):
                z = tf.select(tmp2, z_bar, z, name='z')

    with tf.name_scope('itr_%02d' % T):
        z = _st(b, thresh, name='z')

    return z
Beispiel #11
0
    def _corrupt(self, x, ratio, n_type='MN'):
        with tf.name_scope("Corruption"):
            """ Noise adding (or input corruption)
            This function adds noise to the given data.
            
            Args:
                x    : The input data for the noise to be applied
                ratio: The percentage of the data affected by the noise addition
                n_type: The type of noise to be applied.
                        Choices: MN (masking noise), SP (salt-and-pepper noise)
            """
            
            # Safety check. If unspecified noise type given, use Masking noise instead.
            if n_type != 'MN' and n_type != 'SP' and n_type != 'TFDO':
                n_type = 'MN'
                print("Unknown noise type. Masking noise will be used instead.")
            
            
            # if there is no noise to be added there is no need to proceed further
            if ratio == 0.0:
                return x_tilde, None
            
            if n_type == 'TFDO':
                x_tilde = tf.nn.dropout(x, keep_prob= 1 - ratio)
#                 points_to_alter = x_tilde == 0.
#                 print points_to_alter
#                 x_tilde = tf.select(points_to_alter, tf.add(tf.zeros_like(x_tilde, dtype=tf.float32),
#                                                                 FLAGS.zero_bound), x_tilde, name='X_tilde')
#                 x_tilde[x_tilde == 0.] = tf.constant(FLAGS.zero_bound)
            else:
                # It makes a copy of the data, otherwise 'target_feed' will also be affected
                x_tilde = tf.identity(x, name='X_tilde')
                shape = tf.Tensor.get_shape(x_tilde)
                # Creating and applying random noise to the data. (Masking noise)
                points_to_alter = tf.random_uniform(shape=shape, dtype=tf.float32) < ratio
                
                if n_type == 'MN':
                    x_tilde = tf.select(points_to_alter, tf.add(tf.zeros_like(x_tilde, dtype=tf.float32),
                                                                FLAGS.zero_bound), x_tilde, name='X_tilde')
                    
                elif n_type == 'SP':
                    coin_flip = np.asarray([np.random.choice([FLAGS.zero_bound, FLAGS.one_bound]) for _ in range(shape[0]) for _ in range(shape[1])]).reshape(shape)
                    x_tilde = tf.select(points_to_alter, tf.to_float(coin_flip), x_tilde, name='X_tilde')

                
            # Also returns the 'points_to_alter' in case of applied Emphasis
            if not FLAGS.emphasis or n_type == 'TFDO':
                points_to_alter = None
    
            return x_tilde, points_to_alter
Beispiel #12
0
def huber_loss(y_true, y_pred, clip_value):
    # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and
    # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b
    # for details.
    assert clip_value > 0.

    x = y_true - y_pred
    if np.isinf(clip_value):
        # Spacial case for infinity since Tensorflow does have problems
        # if we compare `K.abs(x) < np.inf`.
        return .5 * K.square(x)

    condition = K.abs(x) < clip_value
    squared_loss = .5 * K.square(x)
    linear_loss = clip_value * (K.abs(x) - .5 * clip_value)
    if K.backend() == 'tensorflow':
        import tensorflow as tf
        if hasattr(tf, 'select'):
            return tf.select(condition, squared_loss, linear_loss)  # condition, true, false
        else:
            return tf.where(condition, squared_loss, linear_loss)  # condition, true, false
    elif K.backend() == 'theano':
        from theano import tensor as T
        return T.switch(condition, squared_loss, linear_loss)
    else:
        raise RuntimeError('Unknown backend "{}".'.format(K.backend()))
Beispiel #13
0
def noisy_activation(x, generic, linearized, training, alpha=1.1, c=0.5):
    """
    Implements the noisy activation with Half-Normal Noise for Hard-Saturation
    functions. See http://arxiv.org/abs/1603.00391, Algorithm 1.

    Args:

        x: Tensor which is an input to the activation function

        generic: The generic formulation of the activation function. (denoted
            as h in the paper)

        linearized: Linearization of the activation based on the first-order
            Tailor expansion around zero. (denoted as u in the paper)

        training: A boolean tensor telling whether we are in the training stage
            (and the noise is sampled) or in runtime when the expactation is
            used instead.

        alpha: Mixing hyper-parameter. The leakage rate from the linearized
            function to the nonlinear one.

        c: Standard deviation of the sampled noise.

    """

    delta = generic(x) - linearized(x)
    d = -tf.sign(x) * tf.sign(1 - alpha)
    p = tf.Variable(1.0)
    scale = c * (tf.sigmoid(p * delta) - 0.5)  ** 2
    noise = tf.select(training, tf.abs(tf.random_normal([])), math.sqrt(2 / math.pi))
    activation = alpha * generic(x) + (1 - alpha) * linearized(x) + d * scale * noise
    return activation
Beispiel #14
0
 def _compare(self, c, x, y, use_gpu):
   np_ans = np.where(c, x, y)
   with self.test_session(use_gpu=use_gpu):
     out = tf.select(c, x, y)
     tf_ans = out.eval()
   self.assertAllEqual(np_ans, tf_ans)
   self.assertShapeEqual(np_ans, out)
Beispiel #15
0
    def _build_graph(self, inputs, is_training):
        state, action, reward, next_state, isOver = inputs
        self.predict_value = self._get_DQN_prediction(state, is_training)
        action_onehot = tf.one_hot(action, NUM_ACTIONS)
        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)    #N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
        add_moving_summary(max_pred_reward)
        self.greedy_choice = tf.argmax(self.predict_value, 1)   # N,

        with tf.variable_scope('target'):
            targetQ_predict_value = self._get_DQN_prediction(next_state, False)    # NxA

            # DQN
            #best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,

            # Double-DQN
            predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

            target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v)

        sqrcost = tf.square(target - pred_action_value)
        abscost = tf.abs(target - pred_action_value)    # robust error func
        cost = tf.select(abscost < 1, sqrcost, abscost)
        summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
                                   ('fc.*/W', ['histogram', 'rms']) ])   # monitor all W
        self.cost = tf.reduce_mean(cost, name='cost')
def proximal_step(train_op, lr):
  # Apply weight decay for the variables with l2 loss
  # If basenet weights are trained together, do not set a weight decay on the
  # conv layers of the basenet
  l2_op_list = []
  l1_op_list = []
  with tf.control_dependencies([train_op]):
    if L2_LOSS_WEIGHT > 0:
      for var in tf.get_collection(utils.WEIGHT_DECAY_KEY):
        assign_op = var.assign_add(- lr * tf.convert_to_tensor(L2_LOSS_WEIGHT) * var)
        l2_op_list.append(assign_op)
        print('\tL2 loss added: %s(strength: %f)' % (var.name, L2_LOSS_WEIGHT))

    # Apply proximal gradient for the variables with l1 lasso loss
    # Non-negative weights constraint
    if L1_LOSS_WEIGHT > 0:
      for var in tf.get_collection(utils.LASSO_KEY):
        th_t = tf.fill(tf.shape(var), tf.convert_to_tensor(L1_LOSS_WEIGHT) * lr)
        zero_t = tf.zeros(tf.shape(var))
        var_temp = var - th_t * tf.sign(var)
        assign_op = var.assign(tf.select(tf.less(var, th_t), zero_t, var_temp))
        l1_op_list.append(assign_op)
        print('\tL1 loss added: %s(strength: %f)' % (var.name, L1_LOSS_WEIGHT))

  with tf.control_dependencies(l2_op_list + l1_op_list):
    train_op = tf.no_op(name='proximal_step')

  return train_op
Beispiel #17
0
        def con1():

            values, top_nodes = top_k_in_2dim_tensor(value_mat, cur_beam_size)
            beam_new = self._concat_top_nodes(beams, top_nodes)
            beam_size_tmp = tf.mul(cur_beam_size, self.output_size)
            beam_size = tf.select(tf.less(beam_size_tmp, self.beam_size), beam_size_tmp, self.beam_size)
            return values, beam_new, beam_size
Beispiel #18
0
def get_total_loss(input_sequence, ngram_predictions, outputs, expected_sequence):
  if args.bootstrap_out:
    outputs = tf.add(outputs, tf.log(ngram_predictions))
  # [batch_size, unrolled_iterations]
  losses = tf.nn.sparse_softmax_cross_entropy_with_logits(outputs, expected_sequence)
  losses = tf.select(tf.equal(input_sequence, data.EOS), tf.zeros_like(losses), losses)
  return tf.reduce_sum(losses)
Beispiel #19
0
def set_logp_to_neg_inf(X, logp, bounds):
    """Set `logp` to negative infinity when `X` is outside the allowed bounds.

    # Arguments
        X: tensorflow.Tensor
            The variable to apply the bounds to
        logp: tensorflow.Tensor
            The log probability corrosponding to `X`
        bounds: list of `Region` objects
            The regions corrosponding to allowed regions of `X`

    # Returns
        logp: tensorflow.Tensor
            The newly bounded log probability
    """
    conditions = []
    for l, u in bounds:
        lower_is_neg_inf = not isinstance(l, tf.Tensor) and np.isneginf(l)
        upper_is_pos_inf = not isinstance(u, tf.Tensor) and np.isposinf(u)

        if not lower_is_neg_inf and upper_is_pos_inf:
            conditions.append(tf.greater(X, l))
        elif lower_is_neg_inf and not upper_is_pos_inf:
            conditions.append(tf.less(X, u))
        elif not (lower_is_neg_inf or upper_is_pos_inf):
            conditions.append(tf.logical_and(tf.greater(X, l), tf.less(X, u)))

    if len(conditions) > 0:
        is_inside_bounds = conditions[0]
        for condition in conditions[1:]:
            is_inside_bounds = tf.logical_or(is_inside_bounds, condition)

        logp = tf.select(is_inside_bounds, logp, tf.fill(tf.shape(X), config.dtype(-np.inf)))

    return logp
    def _create_state(self, batch_size, dtype, cell_state=None):
        cand_symbols = tf.fill([batch_size, self.max_len],
                               tf.constant(self.start_token, dtype=tf.int32))
        cand_logprobs = tf.ones((batch_size,), dtype=tf.float32) * -float('inf')
        cand_symbols.set_shape([batch_size, self.max_len])

        if cell_state is None:
            cell_state = self.cell.zero_state(batch_size*self.beam_size, dtype=dtype)
        else:
            cell_state = BeamDecoder._tile_along_beam(self.beam_size, cell_state)
        full_size = batch_size * self.beam_size
        first_in_beam_mask = tf.equal(tf.range(full_size) % self.beam_size, 0)

        beam_symbols = tf.fill([full_size, self.max_len],
                               tf.constant(self.start_token, dtype=tf.int32))
        beam_logprobs = tf.select(
            first_in_beam_mask,
            tf.fill([full_size], 0.0),
            tf.fill([full_size], -1e18), # top_k does not play well with -inf
                                         # TODO: dtype-dependent value here
        )

        return (
            cand_symbols,
            cand_logprobs,
            beam_symbols,
            beam_logprobs,
            cell_state
        )
Beispiel #21
0
 def compute_max_or_min(self, select, maxi=True):
   #computes the argmax and argmin of a column with probabilistic row selection
   answer = tf.zeros([
       self.batch_size, self.num_cols + self.num_word_cols, self.max_elements
   ], self.data_type)
   sum_prob = tf.zeros([self.batch_size, self.num_cols + self.num_word_cols],
                       self.data_type)
   for j in range(self.max_elements):
     if (maxi):
       curr_pos = j
     else:
       curr_pos = self.max_elements - 1 - j
     select_index = tf.slice(self.full_processed_sorted_index_column,
                             [0, 0, curr_pos], [self.batch_size, -1, 1])
     select_mask = tf.equal(
         tf.tile(
             tf.expand_dims(
                 tf.tile(
                     tf.expand_dims(tf.range(self.max_elements), 0),
                     [self.batch_size, 1]), 1),
             [1, self.num_cols + self.num_word_cols, 1]), select_index)
     curr_prob = tf.expand_dims(select, 1) * tf.cast(
         select_mask, self.data_type) * self.select_bad_number_mask
     curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
     curr_prob = curr_prob * tf.expand_dims(
         tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
     answer = tf.select(select_mask, curr_prob, answer)
     sum_prob += tf.reduce_sum(curr_prob, 2)
   return answer
Beispiel #22
0
def reduce_mean(seq_batch, allow_empty=False):
    """Compute the mean of each sequence in a SequenceBatch.

    Args:
        seq_batch (SequenceBatch): a SequenceBatch with the following attributes:
            values (Tensor): a Tensor of shape (batch_size, seq_length, :, ..., :)
            mask (Tensor): if the mask values are arbitrary floats (rather than binary), the mean will be
            a weighted average.
        allow_empty (bool): allow computing the average of an empty sequence. In this case, we assume 0/0 == 0, rather
            than NaN. Default is False, causing an error to be thrown.

    Returns:
        Tensor: of shape (batch_size, :, ..., :)
    """
    values, mask = seq_batch.values, seq_batch.mask
    # compute weights for the average
    sums = tf.reduce_sum(mask, 1, keep_dims=True)  # (batch_size, 1)

    if allow_empty:
        asserts = []  # no assertion
        sums = tf.select(tf.equal(sums, 0), tf.ones(tf.shape(sums)), sums)  # replace 0's with 1's
    else:
        asserts = [tf.assert_positive(sums)]  # throw error if 0's exist

    with tf.control_dependencies(asserts):
        weights = mask / sums  # (batch_size, seq_length)
    return weighted_sum(seq_batch, weights)
 def _compare(self, c, x, y, use_gpu):
     np_ans = np.dstack([x_i if c_i else y_i for c_i, x_i, y_i in zip(c, x, y)]).transpose([2, 0, 1])
     with self.test_session(use_gpu=use_gpu):
         out = tf.select(c, x, y)
         tf_ans = out.eval()
     self.assertAllEqual(np_ans, tf_ans)
     self.assertShapeEqual(np_ans, out)
Beispiel #24
0
def step(x):
    """Theano step function"""
    if (_BACKEND == 'tensorflow'):
        import tensorflow as tf
        return tf.select(tf.python.math_ops.greater(x, 0), K.ones_like(x), K.zeros_like(x))
    else:
        return K.switch(x > 0, 1, 0)
Beispiel #25
0
 def compute_ans(op_embedding, comparison):
   op_embedding = tf.expand_dims(op_embedding, 0)
   #dot product of operation embedding with hidden state to the left of the number occurence
   first = tf.transpose(
       tf.matmul(op_embedding,
                 tf.transpose(
                     tf.reduce_sum(hidden_vectors * tf.tile(
                         tf.expand_dims(
                             tf.transpose(self.batch_ordinal_question), 2),
                         [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
   second = self.batch_question_number_one_mask + tf.transpose(
       tf.matmul(op_embedding,
                 tf.transpose(
                     tf.reduce_sum(hidden_vectors * tf.tile(
                         tf.expand_dims(
                             tf.transpose(self.batch_ordinal_question_one), 2
                         ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
   question_number_softmax = tf.nn.softmax(tf.concat(1, [first, second]))
   if (self.mode == "test"):
     cond = tf.equal(question_number_softmax,
                     tf.reshape(
                         tf.reduce_max(question_number_softmax, 1),
                         [self.batch_size, 1]))
     question_number_softmax = tf.select(
         cond,
         tf.fill(tf.shape(question_number_softmax), 1.0),
         tf.fill(tf.shape(question_number_softmax), 0.0))
     question_number_softmax = tf.cast(question_number_softmax,
                                       self.data_type)
   ans = tf.reshape(
       tf.reduce_sum(question_number_softmax * tf.concat(
           1, [self.batch_question_number, self.batch_question_number_one]),
                     1), [self.batch_size, 1])
   return ans
Beispiel #26
0
def huber_loss(x, delta=1.0):
    # https://en.wikipedia.org/wiki/Huber_loss
    return tf.select(
        tf.abs(x) < delta,
        tf.square(x) * 0.5,
        delta * (tf.abs(x) - 0.5 * delta)
    )
Beispiel #27
0
    def custom_rnn_autodecoder(decoder_inputs, initial_input, initial_state, cell, scope=None):
      # customized rnn_decoder for the task of dealing with end of character
      with tf.variable_scope(scope or "rnn_decoder"):
        states = [initial_state]
        outputs = []
        prev = None

        for i in xrange(len(decoder_inputs)):
          inp = decoder_inputs[i]
          if i > 0:
            tf.get_variable_scope().reuse_variables()
          output, new_state = cell(inp, states[-1])

          num_batches = self.args.batch_size # new_state.get_shape()[0].value
          num_state = new_state.get_shape()[1].value

          # if the input has an end-of-character signal, have to zero out the state

          #to do:  test this code.

          eoc_detection = inp[:,3]
          eoc_detection = tf.reshape(eoc_detection, [num_batches, 1])

          eoc_detection_state = tfrepeat(eoc_detection, num_state)

          eoc_detection_state = tf.greater(eoc_detection_state, tf.zeros_like(eoc_detection_state, dtype=tf.float32))

          new_state = tf.select(eoc_detection_state, initial_state, new_state)

          outputs.append(output)
          states.append(new_state)
      return outputs, states
def slice_constant(data, batch_size=32, name='constant_data', global_step=None):
  """Provide a slice based on the global_step.

  This is useful when the entire data array can be stored in memory because it
  allows you to feed the data very efficiently.

  Args:
    data: A numpy array or tensor.
    batch_size: The batch size for the produced data.
    name: An optional name for this data.
    global_step: A global step variable that is used to read the data. If None
      then the default prettytensor global_step is used.
  Returns:
    A tensor that produces the given data.
  """
  with tf.name_scope(name):
    all_data = tf.convert_to_tensor(data)
    global_step = global_step or bookkeeper.global_step()

    count = len(data) / batch_size
    extra = len(data) - count * batch_size

    if extra:
      offset = tf.mod(global_step, count)
      return tf.slice(all_data, offset * batch_size, batch_size)
    else:
      offset = tf.mod(global_step, count + 1)
      return tf.slice(all_data, offset * batch_size,
                      tf.select(tf.equal(offset, count), extra, batch_size))
    def __init__(self, action1_bounds, action2_bounds, session):
        self.graph = session.graph
        with self.graph.as_default():
            self.sess = session

            self.action_bounds = [[action1_bounds[1], action2_bounds[1]],
                                  [action1_bounds[0], action2_bounds[0]]]

            self.action_size = len(self.action_bounds[0])
            self.action_input = tf.placeholder(tf.float32, [None, self.action_size])

            self.p_max = tf.constant(self.action_bounds[0], dtype=tf.float32)
            self.p_min = tf.constant(self.action_bounds[1], dtype=tf.float32)

            self.p_range = tf.constant([x - y for x, y in zip(self.action_bounds[0], self.action_bounds[1])],
                                       dtype=tf.float32)

            self.p_diff_max = tf.div(-self.action_input + self.p_max, self.p_range)
            self.p_diff_min = tf.div(self.action_input - self.p_min, self.p_range)

            self.zeros_act_grad_filter = tf.zeros([self.action_size])
            self.act_grad = tf.placeholder(tf.float32, [None, self.action_size])

            self.grad_inverter = tf.select(tf.greater(self.act_grad, self.zeros_act_grad_filter),
                                           tf.mul(self.act_grad, self.p_diff_max),
                                           tf.mul(self.act_grad, self.p_diff_min))
Beispiel #30
0
        def loop(step_, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record):
            cur_feat_x_ = tf.gather(x, step_)
            cur_golden_path_ = tf.gather(golden_path, tf.range(step_))
            cur_golden_feat_ = self._add_tag_dynamic(cur_feat_x_, cur_golden_path_)
            # cur_golden_output_ = self._build_cnn(cur_golden_feat_)
            cur_golden_output_ = build(cur_golden_feat_)
            cur_golden_node_ = tf.gather(golden_path, tf.reshape(step_, [1]))
            golden_value_ = tf.add(golden_value_,
                                  tf.slice(cur_golden_output_, tf.concat(0, [[0], cur_golden_node_]), [1, 1]))

            cur_beam_ = tf.unpack(beams_, num=self.beam_size)
            cur_beam_feat_ = tf.concat(0, [self._add_tag_dynamic(cur_feat_x_, tf.reshape(e, [-1])) for e in cur_beam_])
            # cur_beam_output_ = self._build_cnn(cur_beam_feat_)
            cur_beam_output_ = build(cur_beam_feat_)

            golden_record = golden_record.write(step_, cur_golden_output_)
            beam_record = beam_record.write(step_, cur_beam_output_)

            beam_value_, beams_ = self._top_beams_new(cur_beam_output_, beam_value_, beams_)
            new_golden_path_ = tf.gather(golden_path, tf.range(step_ + 1))
            # golden_beam_id_ = index_of_tensor(new_golden_path_, beams_)
            g_id_ = index_of_tensor(new_golden_path_, beams_)
            golden_inside_ = tf.select(tf.less(tf.shape(g_id_)[0], 1),
                                       tf.constant(False, tf.bool), tf.constant(True, tf.bool))

            step_valid_ = tf.logical_and(tf.less(step_+1, length), tf.less(step_+1, self.max_step_tracked))
            return [step_ + 1, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record]
Beispiel #31
0
def ClippedError(x):
  # Huber loss
  return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
Beispiel #32
0
def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
    if seed is None:
        seed = np.random.randint(10e6)
    return tf.select(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p,
                     tf.ones(shape), tf.zeros(shape))
 def _relu(self, x, leakiness=0.0):
     """Relu, with optional leaky support."""
     return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
    def beam_loop(self, time, cell_output, cell_state, loop_state):
        (
            past_cand_symbols,  # [batch_size, time-1]
            past_cand_logprobs,  # [batch_size]
            past_beam_symbols,  # [batch_size*beam_size, time-1], right-aligned
            past_beam_logprobs,  # [batch_size*beam_size]
        ) = loop_state

        # We don't actually use this, but emit_output is required to match the
        # cell output size specfication. Otherwise we would leave this as None.
        emit_output = cell_output

        # 1. Get scores for all candidate sequences

        logprobs = self.outputs_to_score_fn(cell_output)

        try:
            num_classes = int(logprobs.get_shape()[-1])
        except:
            # Shape inference failed
            num_classes = tf.shape(logprobs)[-1]

        logprobs_batched = tf.reshape(
            logprobs + tf.expand_dims(
                tf.reshape(past_beam_logprobs,
                           [self.batch_size, self.beam_size]), 2),
            [self.batch_size, self.beam_size * num_classes])

        # 2. Determine which states to pass to next iteration

        # TODO(nikita): consider using slice+fill+concat instead of adding a mask
        nondone_mask = tf.reshape(
            tf.cast(tf.equal(tf.range(num_classes), self.stop_token),
                    tf.float32) * self.INVALID_SCORE, [1, 1, num_classes])

        nondone_mask = tf.reshape(
            tf.tile(nondone_mask, [1, self.beam_size, 1]),
            [-1, self.beam_size * num_classes])  # disable all done pathes

        beam_logprobs, indices = tf.nn.top_k(logprobs_batched + nondone_mask,
                                             self.beam_size)
        beam_logprobs = tf.reshape(beam_logprobs, [-1])

        # For continuing to the next symbols
        # TODO (add condition, only those in top K excludes that ends with end token should continue)
        symbols = indices % num_classes  # [batch_size, self.beam_size]
        parent_refs = indices // num_classes  # [batch_size, self.beam_size]

        symbols_history = flat_batch_gather(past_beam_symbols,
                                            parent_refs,
                                            batch_size=self.batch_size,
                                            options_size=self.beam_size)
        beam_symbols = concat_op(
            [symbols_history, tf.reshape(symbols, [-1, 1])], 1)

        # Handle the output and the cell state shuffling
        next_cell_state = nest_map(
            lambda element: batch_gather(element,
                                         parent_refs,
                                         batch_size=self.batch_size,
                                         options_size=self.beam_size),
            cell_state)

        next_input = self.tokens_to_inputs_fn(
            tf.reshape(symbols, [-1, self.beam_size]))

        # 3. Update the candidate pool to include entries that just ended with a stop token
        # TODO( They don't care whether the stop token is in top K, it could potentially include partial captions )
        logprobs_done = tf.reshape(
            logprobs_batched,
            [-1, self.beam_size, num_classes])[:, :, self.stop_token]
        done_parent_refs = tf.argmax(logprobs_done, 1)
        done_symbols = flat_batch_gather(past_beam_symbols,
                                         done_parent_refs,
                                         batch_size=self.batch_size,
                                         options_size=self.beam_size)

        logprobs_done_max = tf.reduce_max(logprobs_done, 1)

        cand_symbols_unpadded = tf.select(
            logprobs_done_max > past_cand_logprobs, done_symbols,
            past_cand_symbols)
        cand_logprobs = tf.maximum(logprobs_done_max, past_cand_logprobs)

        cand_symbols = concat_op([
            cand_symbols_unpadded,
            tf.fill([self.batch_size, 1], self.stop_token)
        ], 1)

        # 4. Check the stopping criteria

        if self.max_len is not None:
            elements_finished_clip = (time >= self.max_len)

        if self.score_upper_bound is not None:
            elements_finished_bound = tf.reduce_max(
                tf.reshape(beam_logprobs, [-1, self.beam_size]),
                1) < (cand_logprobs - self.score_upper_bound)

        if self.max_len is not None and self.score_upper_bound is not None:
            elements_finished = elements_finished_clip | elements_finished_bound
        elif self.score_upper_bound is not None:
            elements_finished = elements_finished_bound
        elif self.max_len is not None:
            # this broadcasts elements_finished_clip to the correct shape
            elements_finished = tf.zeros(
                [self.batch_size], dtype=tf.bool) | elements_finished_clip
        else:
            assert False, "Lack of stopping criterion should have been caught in constructor"

        # 5. Prepare return values
        # While loops require strict shape invariants, so we manually set shapes
        # in case the automatic shape inference can't calculate these. Even when
        # this is redundant is has the benefit of helping catch shape bugs.

        for tensor in list(nest.flatten(next_input)) + list(
                nest.flatten(next_cell_state)):
            tensor.set_shape(
                tf.TensorShape(
                    (self.inferred_batch_size,
                     self.beam_size)).concatenate(tensor.get_shape()[2:]))

        for tensor in [cand_symbols, cand_logprobs, elements_finished]:
            tensor.set_shape(
                tf.TensorShape((self.inferred_batch_size, )).concatenate(
                    tensor.get_shape()[1:]))

        for tensor in [beam_symbols, beam_logprobs]:
            tensor.set_shape(
                tf.TensorShape(
                    (self.inferred_batch_size_times_beam_size, )).concatenate(
                        tensor.get_shape()[1:]))

        next_loop_state = (
            cand_symbols,
            cand_logprobs,
            beam_symbols,
            beam_logprobs,
        )

        return (elements_finished, next_input, next_cell_state, emit_output,
                next_loop_state)
Beispiel #35
0
def create(model, config):
    dim_v, dim_i, dim_d, dim_t, dim_b, dim_n, dim_c = config.getint(
        'vocabsize'), config.getint('wvecsize'), config.getint(
            'depth'), config.getint('steps'), config.getint(
                'batch'), config.getint('deepness'), config.getint('classes')
    lrate_ms, dstep_ms, drate_ms, optim_ms = config.getfloat(
        'mslrate'), config.getint('msdstep'), config.getfloat(
            'msdrate'), getattr(tf.train, config.get('msoptim'))
    lrate_ce, dstep_ce, drate_ce, optim_ce = config.getfloat(
        'celrate'), config.getint('cedstep'), config.getfloat(
            'cedrate'), getattr(tf.train, config.get('ceoptim'))

    with tf.name_scope('embedding'):
        model['We'] = tf.Variable(tf.truncated_normal([dim_v, dim_i],
                                                      stddev=1.0 / dim_i),
                                  name='We')
        model['Be'] = tf.Variable(tf.truncated_normal([1, dim_i],
                                                      stddev=1.0 / dim_i),
                                  name='Be')

    with tf.name_scope('plstm'):
        with tf.name_scope('input'):
            for ii in xrange(dim_t):
                model['pxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='pxi_%i' % ii)
                model['px_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['pxi_%i' % ii]),
                                             model['Be'],
                                             name='px_%i' % ii)

        with tf.name_scope('label'):
            for ii in xrange(dim_t):
                model['pyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='pyi_%i' % ii)
                model['py_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['pyi_%i' % ii]),
                                             model['Be'],
                                             name='py_%i' % ii)

        for i in xrange(dim_d):
            with tf.name_scope('input_%i' % i):
                for ii in xrange(dim_t):
                    model['pFx_%i_%i' %
                          (i,
                           ii)] = model['px_%i' %
                                        ii] if i == 0 else model['pFh_%i_%i' %
                                                                 (i - 1, ii)]
                    model['pBx_%i_%i' %
                          (i,
                           ii)] = model['px_%i' %
                                        ii] if i == 0 else model['pBh_%i_%i' %
                                                                 (i - 1, ii)]

            with tf.name_scope('inputgate_%i' % i):
                model['pFWi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pFWi_%i' % i)
                model['pFBi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pFBi_%i' % i)
                model['pBWi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pBWi_%i' % i)
                model['pBBi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pBBi_%i' % i)
                for ii in xrange(dim_t):
                    model['pFi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['pFx_%i_%i' % (i, ii)],
                                      model['pFWi_%i' % i]),
                            model['pFBi_%i' % i]),
                        name='pFi_%i_%i' % (i, ii))
                    model['pBi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['pBx_%i_%i' % (i, ii)],
                                      model['pBWi_%i' % i]),
                            model['pBBi_%i' % i]),
                        name='pBi_%i_%i' % (i, ii))

            with tf.name_scope('forgetgate_%i' % i):
                model['pFWf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pFWf_%i' % i)
                model['pFBf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pFBf_%i' % i)
                model['pBWf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pBWf_%i' % i)
                model['pBBf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pBBf_%i' % i)
                for ii in xrange(dim_t):
                    model['pFf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['pFx_%i_%i' % (i, ii)],
                                      model['pFWf_%i' % i]),
                            model['pFBf_%i' % i]),
                        name='pFf_%i_%i' % (i, ii))
                    model['pBf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['pBx_%i_%i' % (i, ii)],
                                      model['pBWf_%i' % i]),
                            model['pBBf_%i' % i]),
                        name='pBf_%i_%i' % (i, ii))

            with tf.name_scope('outputgate_%i' % i):
                model['pFWo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pFWo_%i' % i)
                model['pFBo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pFBo_%i' % i)
                model['pBWo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pBWo_%i' % i)
                model['pBBo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pBBo_%i' % i)
                for ii in xrange(dim_t):
                    model['pFo_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['pFx_%i_%i' % (i, ii)],
                                      model['pFWo_%i' % i]),
                            model['pFBo_%i' % i]),
                        name='pFo_%i_%i' % (i, ii))
                    model['pBo_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['pBx_%i_%i' % (i, ii)],
                                      model['pBWo_%i' % i]),
                            model['pBBo_%i' % i]),
                        name='pBo_%i_%i' % (i, ii))

            with tf.name_scope('cellstate_%i' % i):
                model['pFWc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pFWc_' + str(i))
                model['pFBc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pFBc_' + str(i))
                model['pBWc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pBWc_' + str(i))
                model['pBBc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pBBc_' + str(i))
                for ii in xrange(dim_t):
                    model['pFcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.truncated_normal([dim_b, dim_i], stddev=1.0 /
                                            dim_i),
                        name='pFcc_%i_%i' % (i, ii)) if ii == 0 else model[
                            'pFc_%i_%i' %
                            (i, ii - 1)]  # consider starting with all zeros
                    model['pFc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['pxi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['pFcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['pFf_%i_%i' % (i, ii)],
                                   model['pFcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['pFi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['pFx_%i_%i' % (i, ii)],
                                                  model['pFWc_%i' % i]),
                                        model['pFBc_%i' % i])))),
                        name='pFc_%i_%i' % (i, ii))
                for ii in reversed(xrange(dim_t)):
                    model['pBcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.truncated_normal([dim_b, dim_i], stddev=1.0 /
                                            dim_i),
                        name='pBcc_%i_%i' %
                        (i, ii)) if ii == dim_t - 1 else model[
                            'pBc_%i_%i' %
                            (i, ii + 1)]  # consider starting with all zeros
                    model['pBc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['pxi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['pBcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['pBf_%i_%i' % (i, ii)],
                                   model['pBcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['pBi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['pBx_%i_%i' % (i, ii)],
                                                  model['pBWc_%i' % i]),
                                        model['pBBc_%i' % i])))),
                        name='pBc_%i_%i' % (i, ii))

            with tf.name_scope('hidden_%i' % i):
                model['pFWz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pFWz_%i' % i)
                model['pFBz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pFBz_%i' % i)
                model['pBWz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='pBWz_%i' % i)
                model['pBBz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='pBBz_%i' % i)
                for ii in xrange(dim_t):
                    model['pFz_%i_%i' % (i, ii)] = tf.add(
                        tf.matmul(model['pFc_%i_%i' % (i, ii)],
                                  model['pFWz_%i' % i]),
                        model['pFBz_%i' % i],
                        name='pFz_%i_%i' % (i, ii))
                    model['pBz_%i_%i' % (i, ii)] = tf.add(
                        tf.matmul(model['pBc_%i_%i' % (i, ii)],
                                  model['pBWz_%i' % i]),
                        model['pBBz_%i' % i],
                        name='pBz_%i_%i' % (i, ii))

            with tf.name_scope('output_%i' % i):
                for ii in xrange(dim_t):
                    model['pFh_%i_%i' % (i, ii)] = tf.mul(
                        model['pFo_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['pFz_%i_%i' % (i, ii)]),
                        name='pFh_%i_%i' % (i, ii))
                    model['pBh_%i_%i' % (i, ii)] = tf.mul(
                        model['pBo_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['pBz_%i_%i' % (i, ii)]),
                        name='pBh_%i_%i' % (i, ii))
                model['pFh_%i_%i' % (dim_d - 1, -1)] = tf.zeros([dim_b, dim_i],
                                                                tf.float32)
                model['pBh_%i_%i' % (dim_d - 1, dim_t)] = tf.zeros(
                    [dim_b, dim_i], tf.float32)

        with tf.name_scope('output'):
            for ii in xrange(dim_t):
                model['pFh_%i' %
                      ii] = tf.select(tf.equal(model['pxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      model['pFh_%i_%i' % (dim_d - 1, ii - 1)],
                                      model['pFh_%i_%i' % (dim_d - 1, ii)],
                                      name='pFh_%i' % ii)
                model['pBh_%i' %
                      ii] = tf.select(tf.equal(model['pxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      model['pBh_%i_%i' % (dim_d - 1, ii + 1)],
                                      model['pBh_%i_%i' % (dim_d - 1, ii)],
                                      name='pBh_%i' % ii)

        with tf.name_scope('meansquared'):
            for ii in xrange(dim_t):
                model['pFms_%i' %
                      ii] = tf.select(tf.equal(model['pxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['py_%i' % ii],
                                                     model['pFh_%i' % ii])),
                                          [1]),
                                      name='pFms_%i' % ii)
            model['pFms'] = tf.reduce_sum(tf.add_n(
                [model['pFms_%i' % ii] for ii in xrange(dim_t)]),
                                          name='pFms')
            model['sp+ms'] = tf.scalar_summary(model['pFms'].name,
                                               model['pFms'])
            for ii in xrange(dim_t):
                model['pBms_%i' %
                      ii] = tf.select(tf.equal(model['pxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['py_%i' % ii],
                                                     model['pBh_%i' % ii])),
                                          [1]),
                                      name='pBms_%i' % ii)
            model['pBms'] = tf.reduce_sum(tf.add_n(
                [model['pBms_%i' % ii] for ii in xrange(dim_t)]),
                                          name='pBms')
            model['sp-ms'] = tf.scalar_summary(model['pBms'].name,
                                               model['pBms'])

    with tf.name_scope('hlstm'):
        with tf.name_scope('input'):
            for ii in xrange(dim_t):
                model['hxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='hxi_%i' % ii)
                model['hx_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['hxi_%i' % ii]),
                                             model['Be'],
                                             name='hx_%i' % ii)

        with tf.name_scope('label'):
            for ii in xrange(dim_t):
                model['hyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='hyi_%i' % ii)
                model['hy_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['hyi_%i' % ii]),
                                             model['Be'],
                                             name='hy_%i' % ii)

        for i in xrange(dim_d):
            with tf.name_scope('input_%i' % i):
                for ii in xrange(dim_t):
                    model['hFx_%i_%i' %
                          (i,
                           ii)] = model['hx_%i' %
                                        ii] if i == 0 else model['hFh_%i_%i' %
                                                                 (i - 1, ii)]
                    model['hBx_%i_%i' %
                          (i,
                           ii)] = model['hx_%i' %
                                        ii] if i == 0 else model['hBh_%i_%i' %
                                                                 (i - 1, ii)]

            with tf.name_scope('inputgate_%i' % i):
                model['hFWi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hFWi_%i' % i)
                model['hFBi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hFBi_%i' % i)
                model['hBWi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hBWi_%i' % i)
                model['hBBi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hBBi_%i' % i)
                for ii in xrange(dim_t):
                    model['hFi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hFx_%i_%i' % (i, ii)],
                                      model['hFWi_%i' % i]),
                            model['hFBi_%i' % i]),
                        name='hFi_%i_%i' % (i, ii))
                    model['hBi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hBx_%i_%i' % (i, ii)],
                                      model['hBWi_%i' % i]),
                            model['hBBi_%i' % i]),
                        name='hBi_%i_%i' % (i, ii))

            with tf.name_scope('forgetgate_%i' % i):
                model['hFWf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hFWf_%i' % i)
                model['hFBf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hFBf_%i' % i)
                model['hBWf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hBWf_%i' % i)
                model['hBBf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hBBf_%i' % i)
                for ii in xrange(dim_t):
                    model['hFf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hFx_%i_%i' % (i, ii)],
                                      model['hFWf_%i' % i]),
                            model['hFBf_%i' % i]),
                        name='hFf_%i_%i' % (i, ii))
                    model['hBf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hBx_%i_%i' % (i, ii)],
                                      model['hBWf_%i' % i]),
                            model['hBBf_%i' % i]),
                        name='hBf_%i_%i' % (i, ii))

            with tf.name_scope('outputgate_%i' % i):
                model['hFWo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hFWo_%i' % i)
                model['hFBo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hFBo_%i' % i)
                model['hBWo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hBWo_%i' % i)
                model['hBBo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hBBo_%i' % i)
                for ii in xrange(dim_t):
                    model['hFo_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hFx_%i_%i' % (i, ii)],
                                      model['hFWo_%i' % i]),
                            model['hFBo_%i' % i]),
                        name='hFo_%i_%i' % (i, ii))
                    model['hBo_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hBx_%i_%i' % (i, ii)],
                                      model['hBWo_%i' % i]),
                            model['hBBo_%i' % i]),
                        name='hBo_%i_%i' % (i, ii))

            with tf.name_scope('cellstate_%i' % i):
                model['hFWc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hFWc_' + str(i))
                model['hFBc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hFBc_' + str(i))
                model['hBWc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hBWc_' + str(i))
                model['hBBc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hBBc_' + str(i))
                for ii in xrange(dim_t):
                    model['hFcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.truncated_normal([dim_b, dim_i], stddev=1.0 /
                                            dim_i),
                        name='hFcc_%i_%i' % (i, ii)) if ii == 0 else model[
                            'hFc_%i_%i' %
                            (i, ii - 1)]  # consider starting with all zeros
                    model['hFc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['hxi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['hFcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['hFf_%i_%i' % (i, ii)],
                                   model['hFcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['hFi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['hFx_%i_%i' % (i, ii)],
                                                  model['hFWc_%i' % i]),
                                        model['hFBc_%i' % i])))),
                        name='hFc_%i_%i' % (i, ii))
                for ii in reversed(xrange(dim_t)):
                    model['hBcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.truncated_normal([dim_b, dim_i], stddev=1.0 /
                                            dim_i),
                        name='hBcc_%i_%i' %
                        (i, ii)) if ii == dim_t - 1 else model[
                            'hBc_%i_%i' %
                            (i, ii + 1)]  # consider starting with all zeros
                    model['hBc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['hxi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['hBcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['hBf_%i_%i' % (i, ii)],
                                   model['hBcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['hBi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['hBx_%i_%i' % (i, ii)],
                                                  model['hBWc_%i' % i]),
                                        model['hBBc_%i' % i])))),
                        name='hBc_%i_%i' % (i, ii))

            with tf.name_scope('hidden_%i' % i):
                model['hFWz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hFWz_%i' % i)
                model['hFBz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hFBz_%i' % i)
                model['hBWz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                   name='hBWz_%i' % i)
                model['hBBz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                   name='hBBz_%i' % i)
                for ii in xrange(dim_t):
                    model['hFz_%i_%i' % (i, ii)] = tf.add(
                        tf.matmul(model['hFc_%i_%i' % (i, ii)],
                                  model['hFWz_%i' % i]),
                        model['hFBz_%i' % i],
                        name='hFz_%i_%i' % (i, ii))
                    model['hBz_%i_%i' % (i, ii)] = tf.add(
                        tf.matmul(model['hBc_%i_%i' % (i, ii)],
                                  model['hBWz_%i' % i]),
                        model['hBBz_%i' % i],
                        name='hBz_%i_%i' % (i, ii))

            with tf.name_scope('output_%i' % i):
                for ii in xrange(dim_t):
                    model['hFh_%i_%i' % (i, ii)] = tf.mul(
                        model['hFo_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['hFz_%i_%i' % (i, ii)]),
                        name='hFh_%i_%i' % (i, ii))
                    model['hBh_%i_%i' % (i, ii)] = tf.mul(
                        model['hBo_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['hBz_%i_%i' % (i, ii)]),
                        name='hBh_%i_%i' % (i, ii))
                model['hFh_%i_%i' % (dim_d - 1, -1)] = tf.zeros([dim_b, dim_i],
                                                                tf.float32)
                model['hBh_%i_%i' % (dim_d - 1, dim_t)] = tf.zeros(
                    [dim_b, dim_i], tf.float32)

        with tf.name_scope('output'):
            for ii in xrange(dim_t):
                model['hFh_%i' %
                      ii] = tf.select(tf.equal(model['hxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      model['hFh_%i_%i' % (dim_d - 1, ii - 1)],
                                      model['hFh_%i_%i' % (dim_d - 1, ii)],
                                      name='hFh_%i' % ii)
                model['hBh_%i' %
                      ii] = tf.select(tf.equal(model['hxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      model['hBh_%i_%i' % (dim_d - 1, ii + 1)],
                                      model['hBh_%i_%i' % (dim_d - 1, ii)],
                                      name='hBh_%i' % ii)

        with tf.name_scope('meansquared'):
            for ii in xrange(dim_t):
                model['hFms_%i' %
                      ii] = tf.select(tf.equal(model['hxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['hy_%i' % ii],
                                                     model['hFh_%i' % ii])),
                                          [1]),
                                      name='hFms_%i' % ii)
            model['hFms'] = tf.reduce_sum(tf.add_n(
                [model['hFms_%i' % ii] for ii in xrange(dim_t)]),
                                          name='hFms')
            model['sh+ms'] = tf.scalar_summary(model['hFms'].name,
                                               model['hFms'])
            for ii in xrange(dim_t):
                model['hBms_%i' %
                      ii] = tf.select(tf.equal(model['hxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['hy_%i' % ii],
                                                     model['hBh_%i' % ii])),
                                          [1]),
                                      name='hBms_%i' % ii)
            model['hBms'] = tf.reduce_sum(tf.add_n(
                [model['hBms_%i' % ii] for ii in xrange(dim_t)]),
                                          name='hBms')
            model['sh-ms'] = tf.scalar_summary(model['hBms'].name,
                                               model['hBms'])

    with tf.name_scope('classification'):
        with tf.name_scope('label'):
            model['clabel'] = tf.placeholder(tf.float32, [dim_b, dim_c],
                                             name='clabel')

        for i in xrange(dim_n):
            with tf.name_scope('layer_%i' % i):
                model['cW_%i' % i] = tf.Variable(
                    tf.truncated_normal([4 * dim_i, 4 * dim_i],
                                        stddev=0.25 / dim_i),
                    name='cW_%i' %
                    i) if i != dim_n - 1 else tf.Variable(tf.truncated_normal(
                        [4 * dim_i, dim_c], stddev=1.0 / dim_c),
                                                          name='cW_%i' % i)
                model['cB_%i' % i] = tf.Variable(
                    tf.truncated_normal([1, 4 * dim_i], stddev=0.25 / dim_i),
                    name='cB_%i' % i) if i != dim_n - 1 else tf.Variable(
                        tf.truncated_normal([1, dim_c], stddev=1.0 / dim_c),
                        name='cB_%i' % i)
                model['cx_%i' %
                      i] = tf.concat(1, [
                          model['pFh_%i' % (dim_t - 1)], model['pBh_%i' % (0)],
                          model['hFh_%i' % (dim_t - 1)], model['hBh_%i' % (0)]
                      ],
                                     name='cx_%i' %
                                     i) if i == 0 else model['cy_%i' % (i - 1)]
                model['cy_%i' % i] = tf.add(tf.matmul(model['cx_%i' % i],
                                                      model['cW_%i' % i]),
                                            model['cB_%i' % i],
                                            name='cy_%i' % i)

        with tf.name_scope('output'):
            model['output'] = tf.nn.softmax(model['cy_%i' % (dim_n - 1)],
                                            name='output')

        with tf.name_scope('crossentropy'):
            model['cce'] = tf.reduce_sum(
                -tf.mul(model['clabel'], tf.log(model['output'])), name='cce')
            model['scce'] = tf.scalar_summary(model['cce'].name, model['cce'])

    model['gsms'] = tf.Variable(0, trainable=False, name='gsms')
    model['lrms'] = tf.train.exponential_decay(lrate_ms,
                                               model['gsms'],
                                               dstep_ms,
                                               drate_ms,
                                               staircase=False,
                                               name='lrms')
    model['tms'] = optim_ms(model['lrms']).minimize(
        model['pFms'] + model['pBms'] + model['hFms'] + model['hBms'],
        global_step=model['gsms'],
        name='tms')

    model['gsce'] = tf.Variable(0, trainable=False, name='gsce')
    model['lrce'] = tf.train.exponential_decay(lrate_ce,
                                               model['gsce'],
                                               dstep_ce,
                                               drate_ce,
                                               staircase=False,
                                               name='lrce')
    model['tce'] = optim_ce(model['lrce']).minimize(model['cce'],
                                                    global_step=model['gsce'],
                                                    name='tce')

    return model
Beispiel #36
0
for i in range(max_steps):

    x_step = x[:, i, :]
    xh_join = tf.concat(
        1,
        [x_step, h])  # Combine the features and hidden state into one tensor

    ig = tf.sigmoid(tf.matmul(xh_join, W_ig) + b_ig)
    fg = tf.sigmoid(tf.matmul(xh_join, W_fg) + b_fg)
    og = tf.sigmoid(tf.matmul(xh_join, W_og) + b_og)
    c_in = tf.tanh(tf.matmul(xh_join, W_c) + b_c)
    c_out = fg * c + ig * c_in
    h_out = og * tf.tanh(c)

    c = tf.select(
        tf.greater(l, i), c_out,
        c)  # Use old states only if the sequence length has not been exceeded
    h = tf.select(tf.greater(l, i), h_out, h)

ly = tf.matmul(h, W_o) + b_o
ly_flat = tf.reshape(ly, [batch_size])

##########################################################################################
# Optimizer/Analyzer
##########################################################################################

# Cost function and optimizer
#
cost = tf.reduce_mean(tf.square(ly_flat - y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
Beispiel #37
0
    def __init__(self, params, infer=False):

        self.is_training = tf.placeholder(tf.bool)
        self.output_keep_prob = tf.placeholder(tf.float32)

        num_layers = params['nlayer']
        rnn_size = params['n_hidden']
        grad_clip = 10

        cell_lst = []
        for i in range(num_layers):
            cell = tf.nn.rnn_cell.LSTMCell(
                rnn_size,
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False),
                forget_bias=1.0)
            # if i==0:
            #   cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,input_keep_prob= self.output_keep_prob)
            #   cell=cell_drop
            cell_drop = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=self.output_keep_prob)
            cell = cell_drop
            cell_lst.append(cell)

        cell = tf.nn.rnn_cell.MultiRNNCell(cell_lst)

        # cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,output_keep_prob= self.output_keep_prob)
        # cell=cell_drop
        self.cell = cell

        NOUT = params['n_output']  # end_of_stroke + prob + 2*(mu + sig) + corr
        self.input_data = tf.placeholder(
            dtype=tf.float32,
            shape=[None, params['seq_length'], params['n_input']])
        self.input_zero = tf.placeholder(
            dtype=tf.float32,
            shape=[None, params['seq_length'], params['n_input']])
        self.repeat_data = tf.placeholder(dtype=tf.int32,
                                          shape=[None, params['seq_length']])
        self.target_data = tf.placeholder(
            tf.float32, [None, params["seq_length"], params["n_output"]])
        self.initial_state = cell.zero_state(batch_size=params['batch_size'],
                                             dtype=tf.float32)

        #Noise applied only training phase and if only std bigger than 0
        if (params["noise_std"] > 0.0):
            ran_noise = tf.random_normal(shape=[
                params["batch_size"], params['seq_length'], params['n_input']
            ],
                                         mean=0,
                                         stddev=params['noise_std'])
            # ran_noise=tf.mul(ran_noise,self.input_zero)
            tmp_input = tf.nn.relu(self.input_data + ran_noise)
            self.input_data = tf.select(self.is_training, tmp_input,
                                        self.input_data)

        outputs = []
        state = self.initial_state
        with tf.variable_scope("rnnlm"):
            for time_step in range(params['seq_length']):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(self.input_data[:, time_step, :],
                                            state)
                outputs.append(cell_output)
        rnn_output = tf.reshape(tf.transpose(tf.pack(outputs), [1, 0, 2]),
                                [-1, params['n_hidden']])

        with tf.variable_scope('rnnlm'):
            output_w1 = tf.get_variable(
                "output_w1", [rnn_size, NOUT],
                initializer=tf.contrib.layers.xavier_initializer())
            output_b1 = tf.get_variable("output_b1", [NOUT])

        self.final_output = tf.add(tf.matmul(rnn_output, output_w1), output_b1)

        flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1])
        where_flt = tf.not_equal(flt, 0)
        indices = tf.where(where_flt)
        tmp = self.final_output - tf.reshape(self.target_data,
                                             [-1, params["n_output"]])
        tmp = tf.gather(tmp, tf.squeeze(indices, [1]))
        loss = tf.nn.l2_loss(tmp)
        self.cost = tf.reduce_mean(loss)
        self.final_state = state
        tf.scalar_summary('losses/total_loss', loss)

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        total_parameters = 0
        for variable in tvars:
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            variable_parametes = 1
            for dim in shape:
                variable_parametes *= dim.value
            total_parameters += variable_parametes
        self.total_parameters = total_parameters
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        for grad in grads:
            # if isinstance(grad, ops.grads):
            #   grad_values = grad.values
            # else:
            #   grad_values = grad
            grad_values = grad
            logging_ops.histogram_summary(grad.op.name + ':gradient',
                                          grad_values)
            logging_ops.histogram_summary(grad.op.name + ':gradient_norm',
                                          clip_ops.global_norm([grad_values]))
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Beispiel #38
0
def create(embedder, config, scope='bicoder'):
    dim_v, dim_i, dim_d, dim_t, dim_b = config.getint('vocab'), config.getint(
        'wvec'), config.getint('depth'), config.getint('steps'), config.getint(
            'batch')
    samp, lrate, dstep, drate, optim, rfact, reg = config.getint(
        'samples'), config.getfloat('lrate'), config.getint(
            'dstep'), config.getfloat('drate'), getattr(
                tf.train,
                config.get('optim')), config.getfloat('rfact'), getattr(
                    tf.contrib.layers, config.get('reg'))
    model = dict()

    with tf.name_scope(scope):
        with tf.name_scope('input'):
            for ii in xrange(dim_t):
                model['exi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='exi_%i' % ii)
                model['ex_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    embedder['We'], model['exi_%i' % ii]),
                                             embedder['Be'],
                                             name='ex_%i' % ii)

        with tf.name_scope('label'):
            for ii in xrange(dim_t):
                model['eyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='eyi_%i' % ii)
                model['ey_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    embedder['We'], model['eyi_%i' % ii]),
                                             embedder['Be'],
                                             name='ey_%i' % ii)

        for i in xrange(dim_d):
            with tf.name_scope('input_%i' % i):
                for ii in xrange(dim_t):
                    model['ex_%i_%i' %
                          (i,
                           ii)] = model['ex_%i' %
                                        ii] if i == 0 else model['eh_%i_%i' %
                                                                 (i - 1, ii)]

            with tf.name_scope('inputgate_%i' % i):
                model['eFWi_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eFWi_%i' % i)
                model['eFBi_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                   name='eFBi_%i' % i)
                model['eBWi_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eBWi_%i' % i)
                model['eBBi_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                   name='eBBi_%i' % i)
                for ii in xrange(dim_t):
                    model['eFi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['ex_%i_%i' % (i, ii)],
                                      model['eFWi_%i' % i]),
                            model['eFBi_%i' % i]),
                        name='eFi_%i_%i' % (i, ii))
                    model['eBi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['ex_%i_%i' % (i, ii)],
                                      model['eBWi_%i' % i]),
                            model['eBBi_%i' % i]),
                        name='eBi_%i_%i' % (i, ii))

            with tf.name_scope('forgetgate_%i' % i):
                model['eFWf_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eFWf_%i' % i)
                model['eFBf_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                   name='eFBf_%i' % i)
                model['eBWf_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eBWf_%i' % i)
                model['eBBf_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                   name='eBBf_%i' % i)
                for ii in xrange(dim_t):
                    model['eFf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['ex_%i_%i' % (i, ii)],
                                      model['eFWf_%i' % i]),
                            model['eFBf_%i' % i]),
                        name='eFf_%i_%i' % (i, ii))
                    model['eBf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['ex_%i_%i' % (i, ii)],
                                      model['eBWf_%i' % i]),
                            model['eBBf_%i' % i]),
                        name='eBf_%i_%i' % (i, ii))

            with tf.name_scope('outputgate_%i' % i):
                model['eWo_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eWo_%i' % i)
                model['eBo_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                  name='eBo_%i' % i)
                for ii in xrange(dim_t):
                    model['eo_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['ex_%i_%i' % (i, ii)],
                                      model['eWo_%i' % i]),
                            model['eBo_%i' % i]),
                        name='eo_%i_%i' % (i, ii))

            with tf.name_scope('cellstate_%i' % i):
                model['eFWc_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eFWc_' + str(i))
                model['eFBc_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                   name='eFBc_' + str(i))
                model['eBWc_%i' % i] = tf.Variable(
                    tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eBWc_' + str(i))
                model['eBBc_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                   name='eBBc_' + str(i))
                for ii in xrange(dim_t):
                    model['eFcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.random_uniform([dim_b, dim_i], -np.sqrt(6. / dim_i),
                                          np.sqrt(6. / dim_i)),
                        name='eFcc_%i_%i' % (i, ii)) if ii == 0 else model[
                            'eFc_%i_%i' %
                            (i, ii - 1)]  # consider starting with all zeros
                    model['eFc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['exi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['eFcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['eFf_%i_%i' % (i, ii)],
                                   model['eFcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['eFi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['ex_%i_%i' % (i, ii)],
                                                  model['eFWc_%i' % i]),
                                        model['eFBc_%i' % i])))),
                        name='eFc_%i_%i' % (i, ii))
                for ii in reversed(xrange(dim_t)):
                    model['eBcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.random_uniform([dim_b, dim_i], -np.sqrt(6. / dim_i),
                                          np.sqrt(6. / dim_i)),
                        name='eBcc_%i_%i' %
                        (i, ii)) if ii == dim_t - 1 else model[
                            'eBc_%i_%i' %
                            (i, ii + 1)]  # consider starting with all zeros
                    model['eBc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['exi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['eBcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['eBf_%i_%i' % (i, ii)],
                                   model['eBcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['eBi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['ex_%i_%i' % (i, ii)],
                                                  model['eBWc_%i' % i]),
                                        model['eBBc_%i' % i])))),
                        name='eBc_%i_%i' % (i, ii))
                for ii in xrange(dim_t):
                    model['ec_%i_%i' % (i, ii)] = tf.concat(
                        1, [
                            model['eFc_%i_%i' % (i, ii)], model['eBc_%i_%i' %
                                                                (i, ii)]
                        ], 'ec_%i_%i' % (i, ii))

            with tf.name_scope('hidden_%i' % i):
                model['eWz_%i' % i] = tf.Variable(
                    tf.random_uniform([2 * dim_i, dim_i], -np.sqrt(6. / dim_i),
                                      np.sqrt(6. / dim_i)),
                    collections=[
                        tf.GraphKeys.VARIABLES,
                        tf.GraphKeys.REGULARIZATION_LOSSES
                    ],
                    name='eFWz_%i' % i)
                model['eBz_%i' % i] = tf.Variable(tf.random_uniform(
                    [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)),
                                                  name='eFBz_%i' % i)
                for ii in xrange(dim_t):
                    model['ez_%i_%i' % (i, ii)] = tf.add(
                        tf.matmul(model['ec_%i_%i' % (i, ii)],
                                  model['eWz_%i' % i]),
                        model['eBz_%i' % i],
                        name='ez_%i_%i' % (i, ii))

            with tf.name_scope('output_%i' % i):
                for ii in xrange(dim_t):
                    model['eh_%i_%i' % (i, ii)] = tf.mul(
                        model['eo_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['ez_%i_%i' % (i, ii)]),
                        name='eh_%i_%i' % (i, ii))
                model['eh_%i_%i' % (dim_d - 1, -1)] = tf.zeros([dim_b, dim_i],
                                                               tf.float32)

        with tf.name_scope('output'):
            for ii in xrange(dim_t):
                model['eh_%i' %
                      ii] = tf.select(tf.equal(model['exi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      model['eh_%i_%i' % (dim_d - 1, ii - 1)],
                                      model['eh_%i_%i' % (dim_d - 1, ii)],
                                      name='eh_%i' % ii)

        with tf.name_scope('meansquared'):
            for ii in xrange(dim_t):
                model['emse_%i' %
                      ii] = tf.select(tf.equal(model['exi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['ey_%i' % ii],
                                                     model['eh_%i' % ii])),
                                          [1]),
                                      name='emse_%i' % ii)
            model['emse'] = tf.reduce_sum(tf.add_n(
                [model['emse_%i' % ii] for ii in xrange(dim_t)]),
                                          name='emse')
            model['semse'] = tf.scalar_summary(model['emse'].name,
                                               model['emse'])

        with tf.name_scope('negativeloglikelihood'):
            for ii in xrange(dim_t):
                model['enll_%i' %
                      ii] = tf.select(tf.equal(model['exi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.nn.sampled_softmax_loss(
                                          embedder['We'],
                                          tf.zeros([dim_v], tf.float32),
                                          model['eh_%i' % ii],
                                          tf.reshape(model['eyi_%i' % ii],
                                                     [dim_b, 1]), samp, dim_v),
                                      name='enll_%i' % ii)
            model['enll'] = tf.reduce_sum(tf.add_n(
                [model['enll_%i' % ii] for ii in xrange(dim_t)]),
                                          name='enll')
            model['senll'] = tf.scalar_summary(model['enll'].name,
                                               model['enll'])

    model['gse'] = tf.Variable(0, trainable=False, name='gse')
    model['lre'] = tf.train.exponential_decay(lrate,
                                              model['gse'],
                                              dstep,
                                              drate,
                                              staircase=False,
                                              name='lre')
    model['reg'] = tf.contrib.layers.apply_regularization(
        reg(rfact), tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    model['temse'] = optim(model['lre']).minimize(model['emse'] + model['reg'],
                                                  global_step=model['gse'],
                                                  name='temse')
    model['tenll'] = optim(model['lre']).minimize(model['enll'] + model['reg'],
                                                  global_step=model['gse'],
                                                  name='tenll')

    return model
Beispiel #39
0
def salt_and_pepper(X, rate=0.3):
    a = binomial_draw(shape=tf.shape(X), p=1 - rate)
    b = binomial_draw(shape=tf.shape(X), p=0.5)
    z = tf.zeros(tf.shape(X), dtype='float32')
    c = tf.select(tf.equal(a, z), b, z)
    return tf.add(tf.mul(X, a), c)
Beispiel #40
0
def binomial_draw(shape=[1], p=0.5, dtype='float32'):
    return tf.select(
        tf.less(
            tf.random_uniform(shape=shape, minval=0, maxval=1,
                              dtype='float32'), tf.fill(shape, p)),
        tf.ones(shape, dtype=dtype), tf.zeros(shape, dtype=dtype))
Beispiel #41
0
    def buildModel(self, inputShape):

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                self.imageShape = (self.batchSize, inputShape[0],
                                   inputShape[1], inputShape[2])
                #Get convolution variables as placeholders
                self.inputImage = node_variable(self.imageShape, "inputImage")

            self.V1_W = []
            self.normalize_W = []
            self.V1_A = []
            self.recon = []
            self.error = []
            self.reconError = []
            self.sparseError = []
            self.scaledInput = []

            self.t_V1_A = []
            self.t_recon = []
            self.t_error = []
            self.t_reconError = []
            self.t_sparseError = []

            self.underThresh = []
            self.errorStd = []
            self.l1_mean = []
            self.t_errorStd = []
            self.t_l1_mean = []
            self.log_V1_A = []

            self.WShape = []
            self.VShape = []
            self.inShape = []

            for l in range(self.numLayers):
                if l == 0:
                    numInF = inputShape[2]
                else:
                    numInF = self.numV[l - 1]

                V_Y = float(inputShape[0])
                V_X = float(inputShape[1])

                for i in range(l + 1):
                    V_Y_Prev = V_Y
                    V_X_Prev = V_X
                    assert (int(V_Y) % self.VStrideY[i] == 0)
                    assert (int(V_X) % self.VStrideX[i] == 0)
                    V_Y = V_Y / self.VStrideY[i]
                    V_X = V_X / self.VStrideX[i]

                V_Y = int(V_Y)
                V_Y_Prev = int(V_Y_Prev)
                V_X = int(V_X)
                V_X_Prev = int(V_X_Prev)

                self.WShape.append((self.patchSizeY[l], self.patchSizeX[l],
                                    numInF, self.numV[l]))
                self.VShape.append((self.batchSize, V_Y, V_X, self.numV[l]))
                self.inShape.append(
                    (self.batchSize, V_Y_Prev, V_X_Prev, numInF))

                with tf.name_scope("Dictionary"):
                    self.V1_W.append(
                        sparse_weight_variable(self.WShape[l],
                                               "V1_W" + str(l)))

                with tf.name_scope("weightNorm"):
                    self.normVals = tf.sqrt(
                        tf.reduce_sum(tf.square(self.V1_W[l]),
                                      reduction_indices=[0, 1, 2],
                                      keep_dims=True))
                    self.normalize_W.append(self.V1_W[l].assign(
                        self.V1_W[l] / (self.normVals + 1e-8)))

                with tf.name_scope("Encoding"):
                    #Soft threshold
                    self.V1_A.append(
                        weight_variable(self.VShape[l], "V1_A" + str(l), 1e-3))
                    zeroConst = tf.zeros(self.VShape[l])
                    boolUnderThresh = tf.greater(self.zeroThresh[l],
                                                 tf.abs(self.V1_A[l]))
                    self.t_V1_A.append(
                        tf.select(boolUnderThresh, zeroConst, self.V1_A[l]))

                with tf.name_scope("Recon"):
                    assert (self.VStrideY[l] >= 1)
                    assert (self.VStrideX[l] >= 1)
                    #We build index tensor in numpy to gather
                    self.recon.append(
                        conv2d_oneToMany(self.V1_A[l], self.V1_W[l],
                                         self.inShape[l], "recon",
                                         self.VStrideY[l], self.VStrideX[l]))
                    self.t_recon.append(
                        conv2d_oneToMany(self.t_V1_A[l], self.V1_W[l],
                                         self.inShape[l], "t_recon",
                                         self.VStrideY[l], self.VStrideX[l]))

                with tf.name_scope("Error"):
                    #Scale inputImage
                    if (l == 0):
                        #self.scaledInput.append(self.inputImage/np.sqrt(self.patchSizeX[0]*self.patchSizeY[0]*inputShape[2]))
                        self.scaledInput.append(self.inputImage)
                    else:
                        #self.scaledInput.append(self.V1_A[l-1]/np.sqrt(self.patchSizeX[l]*self.patchSizeY[l]*self.numV[l-1]))
                        self.scaledInput.append(self.V1_A[l - 1])
                    self.error.append(self.scaledInput[l] - self.recon[l])
                    self.t_error.append(self.scaledInput[l] - self.t_recon[l])

                with tf.name_scope("Loss"):
                    #Sum across axis except for match
                    self.reconError.append(
                        tf.reduce_mean(
                            tf.reduce_sum(tf.square(self.error[l]),
                                          reduction_indices=[1, 2, 3])))
                    self.sparseError.append(
                        tf.reduce_mean(
                            tf.reduce_sum(tf.abs(self.V1_A[l]),
                                          reduction_indices=[1, 2, 3])))

                    self.t_reconError.append(
                        tf.reduce_mean(
                            tf.reduce_sum(tf.square(self.t_error[l]),
                                          reduction_indices=[1, 2, 3])))
                    self.t_sparseError.append(
                        tf.reduce_mean(
                            tf.reduce_sum(tf.abs(self.t_V1_A[l]),
                                          reduction_indices=[1, 2, 3])))

                with tf.name_scope("stats"):
                    self.underThresh.append(
                        tf.reduce_mean(
                            tf.cast(
                                tf.abs(self.V1_A[l]) > self.zeroThresh[l],
                                tf.float32)))

                    eStd = tf.sqrt(
                        tf.reduce_mean(
                            tf.square(self.error[l] -
                                      tf.reduce_mean(self.error[l]))))
                    t_eStd = tf.sqrt(
                        tf.reduce_mean(
                            tf.square(self.t_error[l] -
                                      tf.reduce_mean(self.t_error[l]))))
                    inStd = tf.sqrt(
                        tf.reduce_mean(
                            tf.square(self.scaledInput[l] -
                                      tf.reduce_mean(self.scaledInput[l]))))

                    self.errorStd.append(eStd / inStd)
                    self.t_errorStd.append(t_eStd / inStd)

                    self.l1_mean.append(tf.reduce_mean(tf.abs(self.V1_A[l])))
                    self.t_l1_mean.append(
                        tf.reduce_mean(tf.abs(self.t_V1_A[l])))

                    #For log of activities
                    self.log_V1_A.append(tf.log(tf.abs(self.V1_A[l]) + 1e-15))

            with tf.name_scope("Loss"):
                #Define loss
                self.loss = self.reconError[0] / 2 + self.thresh[
                    0] * self.sparseError[0]
                self.t_loss = self.t_reconError[0] / 2 + self.thresh[
                    0] * self.t_sparseError[0]
                for l in range(1, self.numLayers):
                    self.loss += self.reconError[l] / 2 + self.thresh[
                        l] * self.sparseError[l]
                    self.t_loss += self.t_reconError[l] / 2 + self.thresh[
                        l] * self.t_sparseError[l]

            with tf.name_scope("Opt"):
                #Define optimizer
                #self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss,
                self.optimizerA = tf.train.AdamOptimizer(
                    self.learningRateA
                ).minimize(
                    self.loss,
                    #self.optimizerA = tf.train.AdadeltaOptimizer(self.learningRateA).minimize(self.loss,
                    var_list=self.V1_A)
                #self.optimizerW = tf.train.AdamOptimizer(self.learningRateW).minimize(self.loss,
                #Minimizing weights with respect to the cutoff weights
                #self.optimizerW = tf.train.AdamOptimizer(self.learningRateW).minimize(self.t_loss,
                self.optimizerW = tf.train.AdadeltaOptimizer(
                    self.learningRateW,
                    epsilon=1e-6).minimize(self.loss, var_list=self.V1_W)

        with tf.name_scope("ReconVis"):
            self.visRecon = []
            self.t_visRecon = []
            for l in range(self.numLayers):
                outRecon = self.recon[l]
                t_outRecon = self.t_recon[l]
                for ll in range(l)[::-1]:
                    #We prob recons down layers
                    outRecon = conv2d_oneToMany(
                        outRecon, self.V1_W[ll], self.inShape[ll],
                        "recon_" + str(l) + "_" + str(ll), self.VStrideY[ll],
                        self.VStrideX[ll])
                    t_outRecon = conv2d_oneToMany(
                        t_outRecon, self.V1_W[ll], self.inShape[ll],
                        "recon_" + str(l) + "_" + str(ll), self.VStrideY[ll],
                        self.VStrideX[ll])
                self.visRecon.append(outRecon)
                self.t_visRecon.append(t_outRecon)

        with tf.name_scope("WeightVis"):
            self.visWeight = []

            for l in range(self.numLayers):
                outWeight = tf.transpose(self.V1_W[l], [3, 0, 1, 2])
                numN = self.WShape[l][3]
                numY = self.WShape[l][0]
                numX = self.WShape[l][1]
                numF = self.WShape[l][2]

                for ll in range(l)[::-1]:
                    numY = self.WShape[ll][0] + (numY - 1) * self.VStrideY[ll]
                    numX = self.WShape[ll][1] + (numX - 1) * self.VStrideX[ll]
                    numF = self.WShape[ll][2]
                    inShape = (numN, numY, numX, numF)
                    outWeight = conv2d_oneToMany(outWeight,
                                                 self.V1_W[ll],
                                                 inShape,
                                                 "weight_" + str(l) + "_" +
                                                 str(ll),
                                                 self.VStrideY[ll],
                                                 self.VStrideX[ll],
                                                 padding="VALID")

                self.visWeight.append(outWeight)

        #Summaries
        self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum")
        self.s_t_loss = tf.scalar_summary('t loss' + str(l),
                                          self.t_loss,
                                          name="t_lossSum")
        self.h_input = tf.histogram_summary('inputImage',
                                            self.inputImage,
                                            name="input")

        for l in range(self.numLayers):
            self.s_recon = tf.scalar_summary('recon error' + str(l),
                                             self.reconError[l],
                                             name="reconError")
            self.s_errorStd = tf.scalar_summary('errorStd' + str(l),
                                                self.errorStd[l],
                                                name="errorStd")
            self.s_l1 = tf.scalar_summary('l1 sparsity' + str(l),
                                          self.sparseError[l],
                                          name="sparseError")
            self.s_l1_mean = tf.scalar_summary('l1 mean' + str(l),
                                               self.l1_mean[l],
                                               name="l1Mean")
            self.s_s_nnz = tf.scalar_summary('nnz' + str(l),
                                             self.underThresh[l],
                                             name="nnz")

            self.s_t_recon = tf.scalar_summary('t recon error' + str(l),
                                               self.t_reconError[l],
                                               name="t_reconError")
            self.s_t_errorStd = tf.scalar_summary('t errorStd' + str(l),
                                                  self.t_errorStd[l],
                                                  name="t_errorStd")
            self.s_t_l1 = tf.scalar_summary('t l1 sparsity' + str(l),
                                            self.t_sparseError[l],
                                            name="t_sparseError")
            self.s_t_l1_mean = tf.scalar_summary('t l1 mean' + str(l),
                                                 self.t_l1_mean[l],
                                                 name="t_l1Mean")

            self.h_input = tf.histogram_summary('scaledInput' + str(l),
                                                self.scaledInput[l],
                                                name="input")
            self.h_recon = tf.histogram_summary('recon' + str(l),
                                                self.recon[l],
                                                name="recon")
            self.h_v1_w = tf.histogram_summary('V1_W' + str(l),
                                               self.V1_W[l],
                                               name="V1_W")
            self.h_v1_a = tf.histogram_summary('V1_A' + str(l),
                                               self.V1_A[l],
                                               name="V1_A")
            self.h_log_v1_a = tf.histogram_summary('Log_V1_A' + str(l),
                                                   self.log_V1_A[l],
                                                   name="Log_V1_A")
    def add_prediction_op(self):

        # get relevent embedding data
        x = self.add_embedding()
        currBatch = tf.shape(x)[0]
        xDrop = tf.nn.dropout(x, self.dropoutPH)
        xRev = tf.reverse(xDrop, dims = [False, True, False])
        # embeds = tf.concat(concat_dim=1, values = [xDrop, xRev])

        # Extract sizes
        hidden_size = self.config.hidden_size
        n_class = self.config.n_class
        batch_size = self.config.batch_size
        max_sentence = self.config.max_sentence
        embedding_size = self.config.embedding_size

        # Define internal RNN Cells
        genCell1Layer1 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size,
                                                 activation = tf.tanh)
        genCell2Layer1 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size,
                                                 activation = tf.tanh)
        genCell1Layer2 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size,
                                                 activation = tf.tanh)
        genCell2Layer2 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size,
                                                 activation = tf.tanh)

        # Apply dropout to each cell
        genC1L1Drop = tf.nn.rnn_cell.DropoutWrapper(genCell1Layer1,
                                                    output_keep_prob=self.dropoutPH)
        genC2L1Drop = tf.nn.rnn_cell.DropoutWrapper(genCell2Layer1,
                                                    output_keep_prob=self.dropoutPH)
        genC1L2Drop = tf.nn.rnn_cell.DropoutWrapper(genCell1Layer2,
                                                    output_keep_prob=self.dropoutPH)
        genC2L2Drop = tf.nn.rnn_cell.DropoutWrapper(genCell2Layer2,
                                                    output_keep_prob=self.dropoutPH)

        # Stack each for multi Cell
        multiFwd = tf.nn.rnn_cell.MultiRNNCell([genC1L1Drop, genC1L2Drop])
        multiBwd = tf.nn.rnn_cell.MultiRNNCell([genC2L1Drop, genC2L2Drop])

        # Set inital states
        # fwdInitState = genC1L1Drop.zero_state(batch_size = currBatch,
        #                                    dtype = tf.float32)
        # bwdInitState = genC2L1Drop.zero_state(batch_size = currBatch,
        #                                    dtype = tf.float32)

        fwdInitState = multiFwd.zero_state(batch_size = currBatch,
                                           dtype = tf.float32)
        bwdInitState = multiBwd.zero_state(batch_size = currBatch,
                                           dtype = tf.float32)

        _, states = tf.nn.bidirectional_dynamic_rnn(cell_fw = multiFwd,
                                                    cell_bw = multiBwd,
                                                    inputs = x,
                                                    initial_state_fw = fwdInitState,
                                                    initial_state_bw = bwdInitState,
                                                    dtype = tf.float32,
                                                    sequence_length = self.seqPH
                                                    )

        # Return is 2 x 2 x 2 x batchsize x hiddensize tensor
        # repretedly unpakc and concat to batchsize x hiddensize * 8 tensor
        unpackedStates = tf.unpack(states, axis = 0)
        concatStates = tf.concat(concat_dim=3, values = unpackedStates)
        unpackedStates = tf.unpack(concatStates, axis = 0)
        concatStates = tf.concat(concat_dim=2, values=unpackedStates)
        unpackedStates = tf.unpack(concatStates, axis = 0)
        finalStates = tf.concat(concat_dim=1, values=unpackedStates)

        # Define our prediciton layer variables
        U = tf.get_variable(name='W_gen',
                            shape=((8 * hidden_size), self.config.max_sentence),
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        c = tf.get_variable(name='b_gen',
                            shape=(self.config.max_sentence,),
                            dtype=tf.float32,
                            initializer=tf.constant_initializer(0.0))

        # zLayer probabilities - each prob is prob of keeping word in review
        zProbs = tf.sigmoid(tf.matmul(finalStates, U) + c)
        zProbs = tf.select(self.maskPH,
                           zProbs,
                           tf.zeros(shape = tf.shape(zProbs), dtype = tf.float32))
        # zProbs = tf.stop_gradient(zProbs)

        # sample zprobs to pick which review words to keep. mask unselected words
        uniform = tf.random_uniform(shape = tf.shape(zProbs), minval=0, maxval=1) < zProbs
        # uniform = tf.stop_gradient(
        #     tf.random_uniform(shape=tf.shape(zProbs), minval=0,
        #                       maxval=1) < zProbs, 'uniform')
        self.zPreds = tf.select(uniform,
                                tf.ones(shape = tf.shape(uniform), dtype = tf.float32),
                                tf.zeros(shape = tf.shape(uniform), dtype = tf.float32))
        masks = tf.zeros(shape = tf.shape(zProbs), dtype = tf.int32) + self.maskId
        maskedInputs = tf.select(uniform, self.inputPH, masks)

        # Return masked embeddings to pass to encoder
        embedding_shape = (-1,
                           self.config.max_sentence,
                           self.config.embedding_size)

        maskedEmbeddings = tf.nn.embedding_lookup(self.pretrained_embeddings,
                                                  maskedInputs)
        maskedEmbeddings = tf.cast(maskedEmbeddings, tf.float32)
        maskedEmbeddings = tf.reshape(maskedEmbeddings, shape=embedding_shape)

        # Use encoder to make predictions
        # encoderPreds = self.encoder.add_prediction_op2(maskedEmbeddings)

        # Define our prediciton layer variables
        W = tf.get_variable(name='W',
                            shape=((4 * hidden_size), n_class),
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        b = tf.get_variable(name='b',
                            shape=(n_class,),
                            dtype=tf.float32,
                            initializer=tf.constant_initializer(0.0))

        cell1 = tf.nn.rnn_cell.LSTMCell(embedding_size, activation=tf.tanh)
        cell2 = tf.nn.rnn_cell.LSTMCell(hidden_size, activation=tf.tanh)

        cell1_drop = tf.nn.rnn_cell.DropoutWrapper(cell1,
                                                   output_keep_prob=self.dropoutPH)
        cell2_drop = tf.nn.rnn_cell.DropoutWrapper(cell2,
                                                   output_keep_prob=self.dropoutPH)
        cell_multi = tf.nn.rnn_cell.MultiRNNCell([cell1_drop, cell2_drop])
        _, result = tf.nn.dynamic_rnn(cell_multi,
                                      maskedEmbeddings,
                                      dtype=tf.float32,
                                      sequence_length=self.seqPH)

        # Return state is a 2 x 2 x batchsize x hiddensize tensor
        # repetedly unpack and concat to batchsize x 4 * hiddensize tensor
        unpackedStates = tf.unpack(result, axis = 0)
        packedStates = tf.concat(concat_dim=2, values = unpackedStates)
        unpackedStates = tf.unpack(packedStates, axis=0)
        finalStates = tf.concat(concat_dim=1, values = unpackedStates)

        y_t = tf.tanh(tf.matmul(finalStates, W) + b)

        return y_t
Beispiel #43
0
def huber_loss(x, delta=1.0):
    # https://en.wikipedia.org/wiki/Huber_loss
    return tf.select(
        tf.abs(x) < delta,
        tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta))
Beispiel #44
0
def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None):
    '''Iterates over the time dimension of a tensor.

    Parameters
    ----------
    inputs: tensor of temporal data of shape (samples, time, ...)
        (at least 3D).
    step_function:
        Parameters:
            input: tensor with shape (samples, ...) (no time dimension),
                representing input for the batch of samples at a certain
                time step.
            states: list of tensors.
        Returns:
            output: tensor with shape (samples, ...) (no time dimension),
            new_states: list of tensors, same length and shapes
                as 'states'.
    initial_states: tensor with shape (samples, ...) (no time dimension),
        containing the initial values for the states used in
        the step function.
    go_backwards: boolean. If True, do the iteration over
        the time dimension in reverse order.
    mask: binary tensor with shape (samples, time, 1),
        with a zero for every element that is masked.

    Returns
    -------
    A tuple (last_output, outputs, new_states).
        last_output: the latest output of the rnn, of shape (samples, ...)
        outputs: tensor with shape (samples, time, ...) where each
            entry outputs[s, t] is the output of the step function
            at time t for sample s.
        new_states: list of tensors, latest states returned by
            the step function, of shape (samples, ...).
    '''
    ndim = len(inputs.get_shape())
    assert ndim >= 3, "Input should be at least 3D."
    axes = [1, 0] + list(range(2, ndim))
    inputs = tf.transpose(inputs, (axes))
    input_list = tf.unpack(inputs)

    states = initial_states
    successive_states = []
    successive_outputs = []
    if go_backwards:
        input_list.reverse()

    if mask is not None:
        # Transpose not supported by bool tensor types, hence round-trip to uint8.
        mask = tf.cast(mask, tf.uint8)
        if len(mask.get_shape()) == ndim - 1:
            mask = expand_dims(mask)
        mask = tf.cast(tf.transpose(mask, axes), tf.bool)
        mask_list = tf.unpack(mask)

        for input, mask_t in zip(input_list, mask_list):
            output, new_states = step_function(input, states)

            # tf.select needs its condition tensor to be the same shape as its two
            # result tensors, but in our case the condition (mask) tensor is
            # (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to
            # broadcast the mask to match the shape of A and B. That's what the
            # tile call does, is just repeat the mask along its second dimension
            # ndimensions times.
            tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))

            if len(successive_outputs) == 0:
                prev_output = zeros_like(output)
            else:
                prev_output = successive_outputs[-1]

            output = tf.select(tiled_mask_t, output, prev_output)

            return_states = []
            for state, new_state in zip(states, new_states):
                # (see earlier comment for tile explanation)
                tiled_mask_t = tf.tile(mask_t,
                                       tf.pack([1, tf.shape(new_state)[1]]))
                return_states.append(tf.select(tiled_mask_t, new_state, state))

            states = return_states
            successive_outputs.append(output)
            successive_states.append(states)
    else:
        for input in input_list:
            output, states = step_function(input, states)
            successive_outputs.append(output)
            successive_states.append(states)

    last_output = successive_outputs[-1]
    outputs = tf.pack(successive_outputs)
    new_states = successive_states[-1]

    axes = [1, 0] + list(range(2, len(outputs.get_shape())))
    outputs = tf.transpose(outputs, axes)
    return last_output, outputs, new_states
Beispiel #45
0
def clipped_error(x):
    # Huber loss
    try:
        return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
    except:
        return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
Beispiel #46
0
def discretized_mix_logistic_loss(x, l, sum_all=True):
    """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """
    xs = int_shape(
        x)  # true image (i.e. labels) to regress to, e.g. (B,32,32,3)
    ls = int_shape(l)  # predicted distribution, e.g. (B,32,32,100)
    nr_mix = int(
        ls[-1] /
        10)  # here and below: unpacking the params of the mixture of logistics
    logit_probs = l[:, :, :, :nr_mix]
    l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3])
    means = l[:, :, :, :, :nr_mix]
    log_scales = tf.maximum(l[:, :, :, :, nr_mix:2 * nr_mix], -7.)
    coeffs = tf.nn.tanh(l[:, :, :, :, 2 * nr_mix:3 * nr_mix])
    x = tf.reshape(x, xs + [1]) + tf.zeros(
        xs + [nr_mix]
    )  # here and below: getting the means and adjusting them based on preceding sub-pixels
    m2 = tf.reshape(
        means[:, :, :, 1, :] + coeffs[:, :, :, 0, :] * x[:, :, :, 0, :],
        [xs[0], xs[1], xs[2], 1, nr_mix])
    m3 = tf.reshape(
        means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] +
        coeffs[:, :, :, 2, :] * x[:, :, :, 1, :],
        [xs[0], xs[1], xs[2], 1, nr_mix])
    means = tf.concat(3, [
        tf.reshape(means[:, :, :, 0, :], [xs[0], xs[1], xs[2], 1, nr_mix]), m2,
        m3
    ])
    centered_x = x - means
    inv_stdv = tf.exp(-log_scales)
    plus_in = inv_stdv * (centered_x + 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    min_in = inv_stdv * (centered_x - 1. / 255.)
    cdf_min = tf.nn.sigmoid(min_in)
    log_cdf_plus = plus_in - tf.nn.softplus(
        plus_in)  # log probability for edge case of 0 (before scaling)
    log_one_minus_cdf_min = -tf.nn.softplus(
        min_in)  # log probability for edge case of 255 (before scaling)
    cdf_delta = cdf_plus - cdf_min  # probability for all other cases
    mid_in = inv_stdv * centered_x
    log_pdf_mid = mid_in - log_scales - 2. * tf.nn.softplus(
        mid_in
    )  # log probability in the center of the bin, to be used in extreme cases (not actually used in our code)

    # now select the right output: left edge case, right edge case, normal case, extremely low prob case (doesn't actually happen for us)

    # this is what we are really doing, but using the robust version below for extreme cases in other applications and to avoid NaN issue with tf.select()
    # log_probs = tf.select(x < -0.999, log_cdf_plus, tf.select(x > 0.999, log_one_minus_cdf_min, tf.log(cdf_delta)))

    # robust version, that still works if probabilities are below 1e-5 (which never happens in our code)
    # tensorflow backpropagates through tf.select() by multiplying with zero instead of selecting: this requires use to use some ugly tricks to avoid potential NaNs
    # the 1e-12 in tf.maximum(cdf_delta, 1e-12) is never actually used as output, it's purely there to get around the tf.select() gradient issue
    # if the probability on a sub-pixel is below 1e-5, we use an approximation based on the assumption that the log-density is constant in the bin of the observed sub-pixel value
    log_probs = tf.select(
        x < -0.999, log_cdf_plus,
        tf.select(
            x > 0.999, log_one_minus_cdf_min,
            tf.select(cdf_delta > 1e-5, tf.log(tf.maximum(cdf_delta, 1e-12)),
                      log_pdf_mid - np.log(127.5))))

    log_probs = tf.reduce_sum(log_probs, 3) + log_prob_from_logits(logit_probs)
    if sum_all:
        return -tf.reduce_sum(log_sum_exp(log_probs))
    else:
        return -tf.reduce_sum(log_sum_exp(log_probs), [1, 2])
Beispiel #47
0
batch_size = 8

#None is used to match the batch
x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
#regression problem usually have one output point
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

#define the parameters of the neural network
w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
y = tf.matmul(x, w1)

#define the chengben
loss_less = 10
loss_more = 1
loss = tf.reduce_sum(
    tf.select(tf.greater(y, y_), (y - y_) * loss_more, (y_ - y) * loss_less))
#cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

#random generate a simulation dataset
rdm = RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)

Y = [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X]

#create a session to run the program
with tf.Session() as sess:
    init_op = tf.initialize_all_variables()
    sess.run(init_op)
    print sess.run(w1)
Beispiel #48
0
def gen_whitenoise_samps(shp):
    return tf.select(
        tf.random_uniform([shp, 784], dtype=tf.float32) > 0.5,
        tf.ones([shp, 784]), tf.zeros([shp, 784]))
    def __init__(self, sess, abstraction_scope, visual_scope, num_actions, num_abstract_actions, num_abstract_states,
                 gamma=0.99, learning_rate=0.00025, replay_start_size=5000,
                 epsilon_start=1.0, epsilon_end=0.1, epsilon_steps=1000000,
                 update_freq=4, target_copy_freq=10000, replay_memory_size=1000000,
                 frame_history=1, batch_size=32, error_clip=1, abstraction_function=None,
                 max_episode_steps=-1, base_network_file=None):
        self.sess = sess
        self.num_abstract_actions = num_abstract_actions
        self.num_abstract_states = num_abstract_states
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.gamma = gamma
        self.frame_history = frame_history
        self.replay_buffer = ReplayMemory((84, 84), 'uint8', replay_memory_size,
                                          frame_history)
        self.abstraction_scope = abstraction_scope
        self.abstraction_function = abstraction_function

        self.inp_frames = tf.placeholder(tf.uint8, [None, 84, 84, self.frame_history])
        self.inp_sp_frames = tf.placeholder(tf.uint8, [None, 84, 84, self.frame_history])
        self.inp_terminated = tf.placeholder(tf.bool, [None])
        self.inp_reward = tf.placeholder(tf.float32, [None])
        self.inp_mask = tf.placeholder(tf.uint8, [None, frame_history])
        self.inp_sp_mask = tf.placeholder(tf.uint8, [None, frame_history])
        self.inp_actions = tf.placeholder(tf.float32, [None, num_actions])
        # onehot vector
        #self.inp_sigma = tf.placeholder(tf.float32, [None, self.num_abstract_states])

        self.reward_matrix = -np.ones((num_abstract_states, num_abstract_states, num_abstract_actions), dtype=np.float32)
        # make self transitions 0
        for i in range(num_abstract_states):
            self.reward_matrix[i, i, :] = 0
        # make goal transitions have reward 1
        for a in range(num_abstract_actions):
            i, j = flat_actions_to_state_pairs(a, num_abstract_states)
            self.reward_matrix[i, j, a] = 1

        self.actions_for_sigma = np.zeros((num_abstract_states, num_abstract_actions), dtype=np.float32)
        for a in range(num_abstract_actions):
            i, j = flat_actions_to_state_pairs(a, num_abstract_states)
            self.actions_for_sigma[i, a] = 1


        # mask stuff here
        mask = tf.reshape(self.inp_mask, [-1, 1, 1, 1])
        masked_input = self.inp_frames * mask

        l0_vis_scope = 'l0_vis'
        with tf.variable_scope(l0_vis_scope):
            self.visual_output_base = hook_visual(masked_input, self.frame_history)
            self.visual_output = tf.stop_gradient(self.visual_output_base)

        with tf.variable_scope('online_base'):
            self.q_online_base = hook_base(self.visual_output_base, self.num_actions)
        with tf.variable_scope('online_1'):
            self.q_online_1 = hook_l0(self.visual_output, 1, self.num_actions)
        with tf.variable_scope('online_2'):
            self.q_online_2 = hook_l0(self.visual_output, 1, self.num_actions)

        self.q_online = tf.concat(1, [self.q_online_1, self.q_online_2])

        mask_sp = tf.reshape(self.inp_sp_mask, [-1, 1, 1, 1])
        masked_input_sp = self.inp_sp_frames * mask_sp

        l0_target_vis_scope = 'l0_target_vis'
        with tf.variable_scope(l0_target_vis_scope):
            self.visual_output_sp = hook_visual(masked_input_sp, self.frame_history)
        with tf.variable_scope('target_base'):
            self.q_target_base = hook_base(self.visual_output_sp, self.num_actions)
        with tf.variable_scope('target_1'):
            self.q_target_1 = hook_l0(self.visual_output_sp, 1, self.num_actions)
        with tf.variable_scope('target_2'):
            self.q_target_2 = hook_l0(self.visual_output_sp, 1, self.num_actions)

        self.q_target = tf.concat(1, [self.q_target_1, self.q_target_2])


        # with tf.variable_scope(visual_scope, reuse=True):
        #     # mask stuff here
        #     mask = tf.reshape(self.inp_mask, [-1, 1, 1, 1])
        #     masked_input = self.inp_frames * mask
        #     self.visual_output = hook_visual(masked_input, self.frame_history)
        #
        #     mask_sp = tf.reshape(self.inp_sp_mask, [-1, 1, 1, 1])
        #     masked_input_sp = self.inp_sp_frames * mask_sp
        #     self.visual_output_sp = hook_visual(masked_input_sp, self.frame_history)
        #
        # with tf.variable_scope('online'):
        #     self.q_online = hook_l0(self.visual_output, self.num_abstract_actions, self.num_actions)
        # with tf.variable_scope('target'):
        #     self.q_target = hook_l0(self.visual_output_sp, self.num_abstract_actions, self.num_actions)

        # TODO set up double dqn for later experiments.

        # Q matrix is (num_abstract_actions, num_actions), results in vector with max-q for each abstract action.
        self.maxQ = tf.reduce_max(self.q_target, reduction_indices=2)

        with tf.variable_scope(visual_scope, reuse=True):
            self.l1_visual_output = hook_visual(masked_input, self.frame_history)
            self.l1_visual_output_sp = hook_visual(masked_input_sp, self.frame_history)
        with tf.variable_scope(self.abstraction_scope, reuse=True):
            self.sigma = tf.stop_gradient(hook_abstraction(self.l1_visual_output, num_abstract_states, batch_size)[0])
            self.sigma_p = tf.stop_gradient(hook_abstraction(self.l1_visual_output_sp, num_abstract_states, batch_size)[0])
            self.sigma_query, self.sigma_query_probs = hook_abstraction(self.l1_visual_output, self.num_abstract_states, 1)

        self.r = tf.reduce_sum(
            tf.reshape(self.sigma_p, [-1, 1, num_abstract_states, 1]) * \
            tf.reshape(self.sigma, [-1, num_abstract_states, 1, 1]) * \
            tf.reshape(self.reward_matrix, [1, num_abstract_states, num_abstract_states, num_abstract_actions]),
            reduction_indices=[1, 2])
        # Give a reward of -1 if reached a terminal state
        self.r = (self.r * tf.reshape(tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32), [-1, 1])) +\
                 tf.reshape(tf.cast(self.inp_terminated, dtype=tf.float32) * -1, [-1, 1])

        self.use_backup = tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32) * tf.reduce_sum(self.sigma_p * self.sigma, reduction_indices=1)
        self.y = tf.stop_gradient(self.r + tf.reshape(self.use_backup, [-1, 1]) * gamma * self.maxQ)
        self.delta = tf.reduce_sum(tf.reshape(self.inp_actions, [-1, 1, num_actions]) * self.q_online, reduction_indices=2) - self.y
        valid_actions_mask = valid_actions_for_sigma(self.actions_for_sigma, self.sigma, self.num_abstract_actions)
        self.masked_delta = self.delta * valid_actions_mask
        self.error = tf.select(tf.abs(self.masked_delta) < error_clip, 0.5 * tf.square(self.masked_delta),
                               error_clip * tf.abs(self.masked_delta))

        # base dqn
        self.maxQ_base = tf.reduce_max(self.q_target_base, reduction_indices=1)
        self.r_base = tf.sign(self.inp_reward)
        use_backup_base = tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32)
        self.y_base = tf.stop_gradient(self.r_base + use_backup_base * gamma * self.maxQ_base)
        self.delta_base = tf.reduce_sum(self.inp_actions * self.q_online_base, reduction_indices=1) - self.y_base
        self.error_base = tf.select(tf.abs(self.delta_base) < error_clip, 0.5 * tf.square(self.delta_base),
                               error_clip * tf.abs(self.delta_base))

        self.loss = tf.reduce_sum(self.error) + tf.reduce_sum(self.error_base)
        self.g = tf.gradients(self.loss, self.q_online)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.95, centered=True, epsilon=0.01)
        self.train_op = optimizer.minimize(self.loss, var_list=th.get_vars('online_1', 'online_2', 'online_base', l0_vis_scope))
        self.copy_op = [th.make_copy_op('online_1', 'target_1'), th.make_copy_op('online_2', 'target_2'), th.make_copy_op(l0_vis_scope, l0_target_vis_scope), th.make_copy_op('online_base', 'target_base')]

        self.replay_buffer = L1ReplayMemory((84, 84), 'uint8', replay_memory_size, frame_history)
        self.frame_history = frame_history
        self.replay_start_size = replay_start_size
        self.epsilon = epsilon_start
        self.epsilon_min = epsilon_end
        self.epsilon_steps = epsilon_steps
        self.epsilon_delta = (self.epsilon - self.epsilon_min) / self.epsilon_steps
        self.update_freq = update_freq
        self.target_copy_freq = target_copy_freq
        self.action_ticker = 1
        self.max_episode_steps = max_episode_steps

        self.num_actions = num_actions
        self.batch_size = batch_size

        self.base_network_saver = tf.train.Saver(var_list=th.get_vars('online_base', l0_vis_scope))
Beispiel #50
0
def bernoulli(p, y):
    return tf.log(tf.select(tf.equal(y, 1), p, 1 - p))
sequence_length_lst = [1, 1, 1, 1, 1]
sequence_length = tf.constant(sequence_length_lst)
done_mask = tf.cast(tf.zeros(batch_size), tf.bool)

for time in range(0, 5):
    print(time)
    current_date = char_prob[:, time, :]
    max_vals = tf.argmax(current_date, 1)
    mask = tf.equal(max_vals, tf.constant(0, tf.int64))

    current_mask = tf.logical_and(mask, tf.logical_not(done_mask))
    done_mask = tf.logical_or(mask, done_mask)

    time_vec = tf.ones(batch_size, tf.int32) * (time + 2)
    sequence_length = tf.select(done_mask,
                                sequence_length,
                                time_vec,
                                name=None)

    not_done_no = tf.reduce_sum(tf.cast(tf.logical_not(done_mask), tf.int32))
    all_eos = tf.equal(not_done_no, tf.constant(0))
    stop_loop = tf.logical_or(all_eos, tf.greater(time, max_it))
    keep_working = tf.logical_not(stop_loop)

sess = tf.Session()
with sess.as_default():
    tf.initialize_all_variables().run()
    #print(char_prob.eval())
    print(max_vals.eval())
    print(mask.eval())
    print(done_mask.eval())
    print(sequence_length.eval())
Beispiel #52
0
    z = tf.mul(u, tf.nn.tanh(g))

    a_newmax = tf.maximum(a_max, a)
    exp_diff = tf.exp(a_max - a_newmax)
    exp_scaled = tf.exp(a - a_newmax)

    n = tf.mul(n, exp_diff) + tf.mul(
        z, exp_scaled)  # Numerically stable update of numerator
    d = tf.mul(
        d, exp_diff) + exp_scaled  # Numerically stable update of denominator
    h_new = activation(tf.div(n, d))
    a_max = a_newmax

    h = tf.select(
        tf.greater(l, i), h_new, h
    )  # Use new hidden state only if the sequence length has not been exceeded

ly = tf.matmul(h, W_o) + b_o
py = tf.nn.softmax(ly)

##########################################################################################
# Optimizer/Analyzer
##########################################################################################

# Cost function and optimizer
#
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    ly, y))  # Cross-entropy cost function
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
sess.as_default()

# L = .5[r + discount * max a' Q(s', a') - Q(s, a)]^2
#	     |------target-------|  |prediction|

# Do a feedforward pass for the current state s to get predicted Q-values for all actions.
action_array_1 = network(state_input_1)
# Do a feedforward pass for the next state s' and calculate maximum overall network outputs max a' Q(s', a').
# Set Q-value target for action to r + discount * max a' Q(s', a') (use the max calculated in step 2). 
# For all other actions, set the Q-value target to the same as originally returned from step 1, making the error 0 for those outputs.

# tt = rr + discount * max(a') Q(ss',aa') or rr if terminal state
tt = reward_input + terminal_input * (GAMMA * max_val_input) 
tt = tf.reshape(tt,(BATCH,1))
target_prep = tf.tile(tt,[1,4])
target = tf.select(action_input, target_prep, action_array_1)

# loss is .5(tt - Q(ss,aa))^2
Qerror = tf.sub(target, action_array_1)
loss = .5*tf.reduce_sum(tf.mul(Qerror, Qerror))

# Update the weights using backpropagation.
optimizer = tf.train.GradientDescentOptimizer(1e-3).minimize(loss)

# saving and loading networks
saver = tf.train.Saver()
tf.initialize_all_variables().run()

checkpoint = tf.train.get_checkpoint_state("saved_networks")
if checkpoint and checkpoint.model_checkpoint_path:
	saver.restore(sess, checkpoint.model_checkpoint_path)
Beispiel #54
0
    def build_networks(self):
        self.nA = self.action_space.n
        self.actor_input = tf.placeholder(tf.float32, name='actor_input')
        self.actions_taken = tf.placeholder(tf.float32, name='actions_taken')
        self.critic_feedback = tf.placeholder(tf.float32,
                                              name='critic_feedback')
        self.critic_rewards = tf.placeholder(tf.float32, name='critic_rewards')

        # Actor network
        W0 = tf.Variable(tf.random_normal(
            [self.nO, self.config['actor_n_hidden']]),
                         name='W0')
        b0 = tf.Variable(tf.zeros([self.config['actor_n_hidden']]), name='b0')
        L1 = tf.tanh(tf.matmul(self.actor_input, W0) + b0[None, :], name='L1')

        W1 = tf.Variable(tf.random_normal(
            [self.config['actor_n_hidden'], self.nA]),
                         name='W1')
        b1 = tf.Variable(tf.zeros([self.nA]), name='b1')
        self.prob_na = tf.nn.softmax(tf.matmul(L1, W1) + b1[None, :],
                                     name='prob_na')

        good_probabilities = tf.reduce_sum(tf.mul(self.prob_na,
                                                  self.actions_taken),
                                           reduction_indices=[1])
        eligibility = tf.log(tf.select(tf.equal(good_probabilities, tf.fill(tf.shape(good_probabilities), 0.0)), tf.fill(tf.shape(good_probabilities), 1e-30), good_probabilities)) \
            * (self.critic_rewards - self.critic_feedback)
        loss = -tf.reduce_mean(eligibility)
        loss = tf.Print(loss, [loss], message='Actor loss=')
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=self.config['actor_learning_rate'],
            decay=0.9,
            epsilon=1e-9)
        self.actor_train = optimizer.minimize(loss)

        self.critic_state_in = tf.placeholder("float", [None, self.nO],
                                              name='critic_state_in')
        self.critic_returns = tf.placeholder("float", name="critic_returns")

        # Critic network
        critic_W0 = tf.Variable(tf.random_normal(
            [self.nO, self.config['critic_n_hidden']]),
                                name='W0')
        critic_b0 = tf.Variable(tf.zeros([self.config['actor_n_hidden']]),
                                name='b0')
        critic_L1 = tf.tanh(tf.matmul(self.critic_state_in, critic_W0) +
                            critic_b0[None, :],
                            name='L1')

        critic_W1 = tf.Variable(tf.random_normal(
            [self.config['actor_n_hidden'], 1]),
                                name='W1')
        critic_b1 = tf.Variable(tf.zeros([1]), name='b1')
        self.critic_value = tf.matmul(critic_L1,
                                      critic_W1) + critic_b1[None, :]
        critic_loss = tf.reduce_mean(
            tf.square(self.critic_returns - self.critic_value))
        critic_loss = tf.Print(critic_loss, [critic_loss],
                               message='Critic loss=')
        critic_optimizer = tf.train.RMSPropOptimizer(
            learning_rate=self.config['critic_learning_rate'],
            decay=0.9,
            epsilon=1e-9)
        self.critic_train = critic_optimizer.minimize(critic_loss)

        init = tf.initialize_all_variables()

        # Launch the graph.
        self.sess = tf.Session()
        self.sess.run(init)
    def __init__(self, num_abstract_states, num_actions, gamma=0.9, learning_rate=0.00025, replay_start_size=32,
                 epsilon_start=1.0, epsilon_end=0.1, epsilon_steps=10000, replay_memory_size=100,
                 frame_history=1, batch_size=32, error_clip=1, abstraction_function=None, base_network_file=None):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.num_abstract_states = num_abstract_states
        self.num_abstract_actions = num_abstract_states * (num_abstract_states - 1)
        self.frame_history = frame_history

        self.abstraction_function = abstraction_function

        self.sess = tf.Session(config=config)
        self.inp_actions = tf.placeholder(tf.float32, [None, self.num_abstract_actions])
        inp_shape = [None, 84, 84, self.frame_history]
        inp_dtype = 'uint8'
        assert type(inp_dtype) is str
        self.inp_frames = tf.placeholder(inp_dtype, inp_shape)
        self.inp_sp_frames = tf.placeholder(inp_dtype, inp_shape)
        self.inp_terminated = tf.placeholder(tf.bool, [None])
        self.inp_reward = tf.placeholder(tf.float32, [None])
        self.inp_mask = tf.placeholder(inp_dtype, [None, frame_history])
        # convert t
        self.inp_sigma = tf.placeholder(tf.uint8, [None])
        self.inp_sigma_onehot = tf.cast(tf.sparse_to_dense(tf.concat(1, [tf.expand_dims(tf.range(0, batch_size), -1), tf.expand_dims(tf.cast(self.inp_sigma, tf.int32), -1)]), [batch_size, self.num_abstract_states], 1), tf.float32)
        self.inp_sigma_p = tf.placeholder(tf.uint8, [None])
        self.inp_sigma_p_onehot = tf.cast(tf.sparse_to_dense(tf.concat(1, [tf.expand_dims(tf.range(0, batch_size), -1), tf.expand_dims(tf.cast(self.inp_sigma_p, tf.int32), -1)]), [batch_size, self.num_abstract_states], 1), tf.float32)
        self.inp_sp_mask = tf.placeholder(inp_dtype, [None, frame_history])
        self.gamma = gamma

        self.actions_for_sigma = np.zeros((self.num_abstract_states, self.num_abstract_actions), dtype=np.float32)
        for a in range(self.num_abstract_actions):
            i, j = flat_actions_to_state_pairs(a, num_abstract_states)
            self.actions_for_sigma[i, a] = 1

        self.visual_scope = 'visual'
        self.abstraction_scope = 'abstraction'
        with tf.variable_scope(self.visual_scope):
            # mask stuff here
            mask = tf.reshape(self.inp_mask, [-1, 1, 1, 1])
            masked_input = self.inp_frames * mask
            self.visual_output = hook_visual(masked_input, self.frame_history)
        with tf.variable_scope(self.abstraction_scope):
            self.sigma, self.sigma_probs = hook_abstraction(self.visual_output, self.num_abstract_states, batch_size, I=self.inp_sigma_onehot)
        with tf.variable_scope(self.abstraction_scope, reuse=True):
            # the one that samples
            self.sigma_query, self.sigma_query_probs = hook_abstraction(self.visual_output, self.num_abstract_states, 1)

        with tf.variable_scope(self.visual_scope, reuse=True):
            mask_sp = tf.reshape(self.inp_sp_mask, [-1, 1, 1, 1])
            masked_input_sp = self.inp_sp_frames * mask_sp
            self.visual_output_sp = hook_visual(masked_input_sp, self.frame_history)
        with tf.variable_scope(self.abstraction_scope, reuse=True):
            self.sigma_p, self.sigma_p_probs = hook_abstraction(self.visual_output_sp, self.num_abstract_states, batch_size, I=self.inp_sigma_p_onehot)

        self.possible_action_vector = tf.stop_gradient(valid_actions_for_sigma(self.actions_for_sigma, self.sigma, self.num_abstract_actions))
        with tf.variable_scope('l1_online'):
            self.q_online = hook_l1(self.sigma, self.num_abstract_actions)
        with tf.variable_scope('l1_online', reuse=True):
            self.possible_action_vector_query = -np.inf * (1 - valid_actions_for_sigma(self.actions_for_sigma, self.sigma_query, self.num_abstract_actions))
            self.possible_action_vector_query = tf.select(tf.is_nan(self.possible_action_vector_query),
                                                          tf.zeros_like(self.possible_action_vector_query),
                                                          self.possible_action_vector_query)
            self.q_online_query = self.possible_action_vector_query + hook_l1(self.sigma_query, self.num_abstract_actions)
        with tf.variable_scope('l1_online', reuse=True):
            self.possible_action_vector_prime = -np.inf * (1 - valid_actions_for_sigma(self.actions_for_sigma, self.sigma_p, self.num_abstract_actions))
            self.possible_action_vector_prime = tf.select(tf.is_nan(self.possible_action_vector_prime), tf.zeros_like(self.possible_action_vector_prime), self.possible_action_vector_prime)
            self.q_target = self.possible_action_vector_prime + hook_l1(self.sigma_p, self.num_abstract_actions)

        self.maxQ = tf.reduce_max(self.q_target, reduction_indices=1)

        self.r = tf.sign(self.inp_reward)
        use_backup = tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32)
        self.y = tf.stop_gradient(self.r + use_backup * gamma * self.maxQ)
        self.delta = tf.reduce_sum(self.inp_actions * self.q_online, reduction_indices=1) - self.y
        self.error = tf.select(tf.abs(self.delta) < error_clip, 0.5 * tf.square(self.delta),
                               error_clip * tf.abs(self.delta))
        self.loss = tf.reduce_sum(self.error)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.95, centered=True, epsilon=0.01)
        # TODO: add th.get_vars(self.visual_scope)+th.get_vars(self.abstraction_scope)
        if self.abstraction_function is None:
            self.train_op = optimizer.minimize(self.loss, var_list=th.get_vars('l1_online', self.abstraction_scope, self.visual_scope))
        else:
            self.train_op = optimizer.minimize(self.loss, var_list=th.get_vars('l1_online'))

        self.saver = tf.train.Saver(var_list=th.get_vars(self.visual_scope)+th.get_vars(self.abstraction_scope)+th.get_vars('l1_online')+th.get_vars('online'))

        self.replay_buffer = L1ReplayMemory((84, 84), np.uint8, replay_memory_size, 1)
        self.frame_history = frame_history
        self.replay_start_size = replay_start_size
        self.epsilon = epsilon_start
        self.epsilon_min = epsilon_end
        self.epsilon_steps = epsilon_steps
        self.epsilon_delta = (self.epsilon - self.epsilon_min) / self.epsilon_steps
        self.action_ticker = 1

        self.num_actions = num_actions
        self.batch_size = batch_size

        self.l0_learner = L0_Learner(self.sess, self.abstraction_scope, self.visual_scope, num_actions, #self.visual_scope, num_actions,
                                     self.num_abstract_actions, self.num_abstract_states,
                                     abstraction_function=self.abstraction_function, max_episode_steps=20, base_network_file=base_network_file)

        self.sess.run(tf.initialize_all_variables())

        if base_network_file is not None:
            self.l0_learner.base_network_saver.restore(self.sess, base_network_file)
            print 'Restored network from file'
Beispiel #56
0
    def __init__(self,
                 config,
                 use_lstm=False,
                 num_samples=512,
                 forward=False,
                 scope_name='gen_seq2seq',
                 dtype=tf.float32):

        self.scope_name = scope_name
        with tf.variable_scope(self.scope_name):
            self.source_vocab_size = config.vocab_size
            self.target_vocab_size = config.vocab_size
            self.buckets = config.buckets
            self.learning_rate = tf.Variable(float(config.learning_rate),
                                             trainable=False,
                                             dtype=dtype)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * config.learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)
            self.batch_size = config.batch_size
            self.emb_dim = config.emb_dim
            self.num_layers = config.num_layers
            self.max_gradient_norm = config.max_gradient_norm

            #self.up_reward = tf.placeholder(tf.bool, name="up_reward")
            self.mc_search = tf.placeholder(tf.bool, name="mc_search")
            self.forward_only = tf.placeholder(tf.bool, name="forward_only")

            # If we use sampled softmax, we need an output projection.
            output_projection = None
            softmax_loss_function = None

            # Create the internal multi-layer cell for our RNN.
            single_cell = tf.nn.rnn_cell.GRUCell(self.emb_dim)
            if use_lstm:
                single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_dim)
            cell = single_cell
            if self.num_layers > 1:
                cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] *
                                                   self.num_layers)

            # The seq2seq function: we use embedding for the input and attention.
            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return rl_seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    num_encoder_symbols=self.source_vocab_size,
                    num_decoder_symbols=self.target_vocab_size,
                    embedding_size=self.emb_dim,
                    output_projection=output_projection,
                    feed_previous=do_decode,
                    mc_search=self.mc_search,
                    dtype=dtype)

            # Feeds for inputs.
            self.encoder_inputs = []
            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(
                    self.buckets[-1][0]):  # Last bucket is the biggest one.
                self.encoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="encoder{0}".format(i)))
            for i in xrange(self.buckets[-1][1] + 1):
                self.decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}".format(i)))
                self.target_weights.append(
                    tf.placeholder(dtype,
                                   shape=[None],
                                   name="weight{0}".format(i)))
            self.reward = [
                tf.placeholder(tf.float32, name="reward_%i" % i)
                for i in range(len(self.buckets))
            ]

            # Our targets are decoder inputs shifted by one.
            targets = [
                self.decoder_inputs[i + 1]
                for i in xrange(len(self.decoder_inputs) - 1)
            ]

            self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.buckets,
                self.emb_dim,
                self.batch_size,
                lambda x, y: seq2seq_f(
                    x, y, tf.select(self.forward_only, True, False)),
                output_projection=output_projection,
                softmax_loss_function=softmax_loss_function)

            with tf.name_scope("gradient_descent"):
                self.gradient_norms = []
                self.updates = []
                self.gen_params = [
                    p for p in tf.trainable_variables()
                    if self.scope_name in p.name
                ]
                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                for b in xrange(len(self.buckets)):
                    adjusted_losses = tf.mul(self.losses[b], self.reward[b])
                    gradients = tf.gradients(adjusted_losses, self.gen_params)
                    clipped_gradients, norm = tf.clip_by_global_norm(
                        gradients, self.max_gradient_norm)
                    self.gradient_norms.append(norm)
                    self.updates.append(
                        opt.apply_gradients(zip(clipped_gradients,
                                                self.gen_params),
                                            global_step=self.global_step))

            self.gen_variables = [
                k for k in tf.global_variables() if self.scope_name in k.name
            ]
            self.saver = tf.train.Saver(self.gen_variables)
Beispiel #57
0
def ModelHelper(y_pred_conf, y_pred_loc):
    """
	Define loss function, optimizer, predictions, and accuracy metric
	Loss includes confidence loss and localization loss

	conf_loss_mask is created at batch generation time, to mask the confidence losses
	It has 1 at locations w/ positives, and 1 at select negative locations
	such that negative-to-positive ratio of NEG_POS_RATIO is satisfied

	Arguments:
		* y_pred_conf: Class predictions from model,
			a tensor of shape [batch_size, num_feature_map_cells * num_defaul_boxes * num_classes]
		* y_pred_loc: Localization predictions from model,
			a tensor of shape [batch_size, num_feature_map_cells * num_defaul_boxes * 4]

	Returns relevant tensor references
	"""
    num_total_preds = 0
    for fm_size in FM_SIZES:
        num_total_preds += fm_size[0] * fm_size[1] * NUM_DEFAULT_BOXES
    num_total_preds_conf = num_total_preds * NUM_CLASSES
    num_total_preds_loc = num_total_preds * 4

    # Input tensors
    y_true_conf = tf.placeholder(
        tf.int32, [None, num_total_preds],
        name='y_true_conf')  # classification ground-truth labels
    y_true_loc = tf.placeholder(
        tf.float32, [None, num_total_preds_loc],
        name='y_true_loc')  # localization ground-truth labels
    conf_loss_mask = tf.placeholder(
        tf.float32, [None, num_total_preds],
        name='conf_loss_mask')  # 1 mask "bit" per def. box

    # Confidence loss
    logits = tf.reshape(y_pred_conf, [-1, num_total_preds, NUM_CLASSES])
    conf_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits, y_true_conf)
    conf_loss = conf_loss_mask * conf_loss  # "zero-out" the loss for don't-care negatives
    conf_loss = tf.reduce_sum(conf_loss)

    # Localization loss (smooth L1 loss)
    # loc_loss_mask is analagous to conf_loss_mask, except 4 times the size
    diff = y_true_loc - y_pred_loc

    loc_loss_l2 = 0.5 * (diff**2.0)
    loc_loss_l1 = tf.abs(diff) - 0.5
    smooth_l1_condition = tf.less(tf.abs(diff), 1.0)
    loc_loss = tf.select(smooth_l1_condition, loc_loss_l2, loc_loss_l1)

    loc_loss_mask = tf.minimum(
        y_true_conf, 1
    )  # have non-zero localization loss only where we have matching ground-truth box
    loc_loss_mask = tf.to_float(loc_loss_mask)
    loc_loss_mask = tf.stack(
        [loc_loss_mask] * 4, axis=2
    )  # [0, 1, 1] -> [[[0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]], ...]
    loc_loss_mask = tf.reshape(
        loc_loss_mask, [-1, num_total_preds_loc
                        ])  # removing the inner-most dimension of above
    loc_loss = loc_loss_mask * loc_loss
    loc_loss = tf.reduce_sum(loc_loss)

    # Weighted average of confidence loss and localization loss
    # Also add regularization loss
    loss = conf_loss + LOC_LOSS_WEIGHT * loc_loss + tf.reduce_sum(
        slim.losses.get_regularization_losses())
    optimizer = OPT.minimize(loss)

    #reported_loss = loss #tf.reduce_sum(loss, 1)  # DEBUG

    # Class probabilities and predictions
    probs_all = tf.nn.softmax(logits)
    probs, preds_conf = tf.nn.top_k(
        probs_all
    )  # take top-1 probability, and the index is the predicted class
    probs = tf.reshape(probs, [-1, num_total_preds])
    preds_conf = tf.reshape(preds_conf, [-1, num_total_preds])

    # Return a dictionary of {tensor_name: tensor_reference}
    ret_dict = {
        'y_true_conf': y_true_conf,
        'y_true_loc': y_true_loc,
        'conf_loss_mask': conf_loss_mask,
        'optimizer': optimizer,
        'conf_loss': conf_loss,
        'loc_loss': loc_loss,
        'loss': loss,
        'probs': probs,
        'preds_conf': preds_conf,
        'preds_loc': y_pred_loc,
    }
    return ret_dict
Beispiel #58
0
    def attention(self):
        self.middle = 200
        self.max_candidate = 20
        self.disamb_in = tf.placeholder(tf.int32, [None, self.max_candidate],
                                        name='disamb_in')
        self.embedding = tf.placeholder(tf.float32, [14951, self.transe_size],
                                        name='embedding')

        left_query_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_size)
        right_query_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_size)

        with tf.variable_scope(self.scope):
            with tf.variable_scope('query'):
                left_query_out, _ = tf.nn.rnn(left_query_lstm,
                                              self.left_in_rev,
                                              dtype=tf.float32)
            with tf.variable_scope('query', reuse=True):
                right_query_out, _ = tf.nn.rnn(right_query_lstm,
                                               self.right_in_rev,
                                               dtype=tf.float32)

        query_in = tf.concat(
            1, [self.entity_in, left_query_out[-1], right_query_out[-1]])
        Wq1 = tf.Variable(tf.random_normal([self.word_size+2*self.lstm_size, self.middle], \
            stddev=self.dev))
        Wq2 = tf.Variable(
            tf.random_normal([self.middle, self.transe_size], stddev=self.dev))
        self.query = tf.tanh(tf.matmul(tf.tanh(tf.matmul(query_in, Wq1)), Wq2))
        self.query_ = tf.placeholder(tf.float32, [None, self.transe_size])

        #choose the most likely embedding
        expand = tf.gather(self.embedding, self.disamb_in)
        multi = tf.transpose(tf.pack([self.query] * self.max_candidate),
                             perm=[1, 0, 2])
        diff = tf.reduce_sum(tf.pow(expand - multi, 2), 2)

        ladder = tf.expand_dims(tf.to_int64(tf.range(self.batch_size)), 1)
        DIFF = tf.expand_dims(tf.argmin(diff, 1), 1)

        choice = tf.gather_nd(self.disamb_in, tf.concat(1, [ladder, DIFF]))

        self.sh = tf.placeholder(tf.float32)
        miss = tf.logical_not(tf.logical_or(\
            tf.equal(self.disamb_in[:,1], 0), \
            tf.less(tf.reduce_min(diff, 1), self.sh))) # should be false for training

        temp_query = tf.gather(self.embedding, choice)
        real_query = tf.select(tf.logical_or(miss, \
            tf.equal(choice, tf.zeros([self.batch_size], dtype=tf.int32))),
            self.query, temp_query)

        self.A = tf.Variable(tf.random_normal([self.lstm_size*2, self.transe_size], \
            mean=0, stddev=self.dev))
        self.test = tf.placeholder(tf.bool, [None])
        Q = tf.select(self.test, real_query, self.query_)

        left_att = [tf.pow(tf.reduce_sum(tf.matmul(self.left_att_in[i], self.A) * Q, \
            [1], keep_dims=True),2)\
            for i in range(self.window)]
        right_att = [tf.pow(tf.reduce_sum(tf.matmul(self.right_att_in[i], self.A) * Q, \
            [1], keep_dims=True),2)\
            for i in range(self.window)]

        return (left_att, right_att)
Beispiel #59
0
	z = tf.mul(u, tf.nn.tanh(g))

	a_newmax = tf.maximum(a_max, a)
	exp_diff = tf.exp(a_max-a_newmax)
	exp_scaled = tf.exp(a-a_newmax)

	n = tf.mul(n, exp_diff)+tf.mul(z, exp_scaled)	# Numerically stable update of numerator
	d = tf.mul(d, exp_diff)+exp_scaled	# Numerically stable update of denominator
	h = activation(tf.div(n, d))
	a_max = a_newmax

	ly = tf.matmul(h, W_o)+b_o

	error_step = tf.nn.softmax_cross_entropy_with_logits(ly, y[:,i,:])	# Cross-entropy cost function
	error += tf.select(tf.greater(l, i), error_step, tf.zeros([batch_size]))	# Include cost from this step only if the sequence length has not been exceeded

##########################################################################################
# Optimizer
##########################################################################################

# Optimizer
#
cost = tf.reduce_mean(tf.div(error, l))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

##########################################################################################
# Train
##########################################################################################

# Operation to initialize session
Beispiel #60
0
def k_m_tf(defect_tensor,
           clusters,
           max_iters,
           summaries_dir,
           stage_str,
           name_str,
           go_to_max=False):
    length = len(defect_tensor[:, 0])
    num_clus = clusters
    MAX_ITERS = max_iters
    tiles = len(defect_tensor[0, :])
    start = time.time()

    sess = tf.InteractiveSession()
    with tf.name_scope('input'):
        points = tf.Variable(tf.random_uniform([length, tiles]),
                             dtype=tf.float32)
    with tf.name_scope('cluster_assigns'):
        cluster_assignments = tf.Variable(tf.zeros([length], dtype=tf.float32))

    with tf.name_scope('cents'):
        centroids = tf.Variable(tf.random_crop(points.initialized_value(),
                                               [num_clus, tiles]),
                                dtype=tf.float32)
    # centroids = tf.Print(centroids,[centroids], summarize = 16, message = 'centroids')

    # Replicate to N copies of each centroid and K copies of each
    # point, then subtract and compute the sum of squared distances.
    with tf.name_scope('Replicate'):
        rep_centroids = tf.reshape(tf.tile(centroids, [length, 1]),
                                   [length, num_clus, tiles])
        # rep_centroids = tf.Print(rep_centroids,[tf.shape(rep_centroids)],message='shape_rep_centroids')
        rep_points = tf.reshape(tf.tile(points, [1, num_clus]),
                                [length, num_clus, tiles])

    with tf.name_scope('Sum_squares'):
        squares = tf.square(rep_points - rep_centroids)
        sum_squares = tf.reduce_sum(tf.square(squares), reduction_indices=2)
        squares_1d = tf.scalar_summary('sum_squares',
                                       tf.reduce_mean(sum_squares))
        # sum_squares = tf.Print(sum_squares,[sum_squares], summarize = 40, message = 'sum_squares')
        # sum_squares = tf.Print(sum_squares,[tf.shape(sum_squares)], summarize = 16, message = 'sum_squares_shape')

        # Use argmin to select the lowest-distance point
    with tf.name_scope('argmin'):
        best_centroids = tf.argmin(sum_squares, 1)
        # best_centroids = tf.Print(best_centroids,[best_centroids], summarize = 40,  message = ' best_cents')
    did_assignments_change = tf.reduce_any(
        tf.not_equal(tf.cast(best_centroids, tf.float32), cluster_assignments))

    ## This part exists for counting purposes, since I can't simply access the count in the means part
    with tf.name_scope('counting'):
        const_1d = {}
        num_1d = {}
        found_1d = {}
        scalar_1d = {}

        for i in range(0, num_clus):
            const_1d[i] = tf.constant(i, shape=[320, 1], dtype=tf.int64)
        # string_1d[i] = tf.constant(str[i], shape =[320,1], dtype = tf.string)

        for i in range(0, num_clus):
            num_1d[i] = tf.equal(tf.reshape(best_centroids, [320, 1]),
                                 const_1d[i])
            found_1d[i] = tf.reduce_sum(tf.cast(num_1d[i], tf.int32))
            found_1d[i] = tf.expand_dims(found_1d[i], -1)
            scalar_1d[i] = tf.scalar_summary(str(i), tf.squeeze(found_1d[i]))
            # found_1d[i] = tf.Print(found_1d[i], [found_1d[i]], summarize=40, message=str(i))
            # found_1d[i] = tf.Print(found_1d[i], [tf.shape(found_1d[i])], summarize=40, message=str(i))
            # found_1d[i] = tf.Print(found_1d[i],[tf.expand_dims(found_1d[i],0)], summarize = 40, message =str(i))
            # found_1d[i] = tf.Print(found_1d[i],[tf.shape(tf.expand_dims(found_1d[i],0))], summarize = 40, message =str(i))
            # found_1d[i] = tf.Print(found_1d[i], [tf.shape(tf.reshape(found_1d[i],[1,1]))], summarize=40, message=str(i))

        found_tensor = tf.concat(0, [found_1d[i] for i in range(0, num_clus)])
        distro = tf.histogram_summary('Distribution', found_tensor)


## calculate the means at the indices of best_centroids.
    with tf.name_scope('means'):
        total = tf.unsorted_segment_sum(points, best_centroids, num_clus)
        count = tf.unsorted_segment_sum(tf.ones_like(points), best_centroids,
                                        num_clus)
        # count = tf.Print(count, [tf.shape(count)])
        means = total / count
        means = tf.select(tf.is_nan(means), tf.ones_like(means) * 0, means)
        means_1d = tf.scalar_summary('means', tf.reduce_mean(means))
        # means = tf.Print(means,[means],summarize = 16, message = 'MEANS')
        # means = tf.Print(means,[tf.shape(means)], message = 'm_shape')
    # Do not write to the assigned clusters variable until after
    # computing whether the assignments have changed - hence with_dependencies
    with tf.name_scope('Do_updates'):
        with tf.control_dependencies([did_assignments_change]):
            do_updates = tf.group(
                centroids.assign(means),
                cluster_assignments.assign(tf.cast(best_centroids,
                                                   tf.float32)))

    changed = True
    iters = 0
    found_numerical = {}
    # found_1d = tf.Print(found_1d,[found_1d])

    # Merge summaries
    scalar_summary = tf.merge_summary(
        [scalar_1d[i] for i in range(0, num_clus)])
    other_summary = tf.merge_summary([means_1d, squares_1d])
    histogram_summary = tf.merge_summary([distro])

    writer = tf.train.SummaryWriter(
        summaries_dir + '/' + stage_str + '/kmeans/' + name_str, sess.graph)
    init = tf.initialize_all_variables()

    sess.run(init)
    # loop
    # check for assignment changes and assign new based on new means. If assignments didnt change, stop.
    while changed and iters < MAX_ITERS:
        iters += 1
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        # if iters%10 == 1:
        [changed, _, histogram_sum_run, scalar_sum_run,
         other_sum_run] = sess.run([
             did_assignments_change, do_updates, histogram_summary,
             scalar_summary, other_summary
         ],
                                   feed_dict={points: defect_tensor})
        writer.add_run_metadata(run_metadata, 'step%03d' % iters)
        writer.add_summary(histogram_sum_run, iters)
        writer.add_summary(scalar_sum_run, iters)
        writer.add_summary(other_sum_run, iters)
        # else:
        #     [changed, _, scalar_sum_run] = sess.run([did_assignments_change, do_updates, scalar_summary], feed_dict={points: defect_tensor})
        #     writer.add_run_metadata(run_metadata, 'step%03d' % iters)
        #     writer.add_summary(scalar_sum_run, iters)

        ## Note: due to the interconnectivity of found_1d, it seems as you need to run it ALONG the session a couple lines before in order to get numerical results
        ## Can't do that in a seperate run. Weirdly enough it works for found_tensor, which is simply a concat of found_1d. I don't know why.
        # found_numerical[0] = sess.run([found_1d[0]], feed_dict={points:defect_tensor})
        found_numerical[1] = sess.run([found_1d[1]],
                                      feed_dict={points: defect_tensor})
        found_numerical[3] = sess.run([found_1d[3]],
                                      feed_dict={points: defect_tensor})
        found_numerical[4] = sess.run([found_1d[4]],
                                      feed_dict={points: defect_tensor})

        if go_to_max == True:
            changed = True
    writer.close()
    [centers, assignments] = sess.run([centroids, cluster_assignments])

    end = time.time()

    print("Found in %.2f seconds" % (end - start), iters, "iterations")
    print('Distribution:',
          sess.run(found_tensor, feed_dict={points: defect_tensor}))

    tf.reset_default_graph()
    sess.close()
    return centers, assignments