コード例 #1
0
ファイル: helper.py プロジェクト: pastelmind/ast-codez
 def sample(self, time, outputs, state, name=None):
     with ops.name_scope(name, "ScheduledOutputTrainingHelperSample",
                         [time, outputs, state]):
         sampler = Bernoulli(probs=self._sampling_probability)
         return math_ops.cast(
             sampler.sample(sample_shape=self.batch_size, seed=self._seed),
             dtypes.bool)
コード例 #2
0
    def __init__(self,
                 n_in,
                 n_out,
                 model_prob=0.9,
                 model_lam=1e-2,
                 activation=None,
                 name="hidden"):
        self.model_prob = model_prob  # probability to keep units
        self.model_lam = model_lam  # l^2 / 2*tau
        self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
        self.dropout_mask = self.model_bern.sample((n_in, ))

        if activation is None:
            self.activation = tf.identity
        else:
            self.activation = activation

        kernel_initializer = tf.initializers.truncated_normal(mean=0.0,
                                                              stddev=0.01)
        self.model_M = tf.get_variable("{}_M".format(name),
                                       initializer=kernel_initializer(
                                           [n_in,
                                            n_out]))  # variational parameters
        self.model_m = tf.get_variable("{}_b".format(name),
                                       initializer=tf.zeros([n_out]))

        self.model_W = tf.matmul(tf.diag(self.dropout_mask), self.model_M)
コード例 #3
0
ファイル: trnn_imply.py プロジェクト: zdcuob/tensor_train_RNN
def tensor_rnn_with_feed_prev(cell, inputs, is_training, config, initial_states=None):
    """High Order Recurrent Neural Network Layer
    """
    #tuple of 2-d tensor (batch_size, s)
    outputs = []
    prev = None
    is_sample = is_training and initial_states is not None

    with tf.variable_scope("trnn") as varscope:
        if varscope.caching_device is None:
                    varscope.set_caching_device(lambda op: op.device)

        inputs_shape = inputs.get_shape().with_rank_at_least(3)
        batch_size = tf.shape(inputs)[0] 
        num_steps = inputs_shape[1]
        input_size = int(inputs_shape[2])
        output_size = cell.output_size
        inp_steps =  config.inp_steps
        
        # Scheduled sampling
        dist = Bernoulli(probs=config.sample_prob)
        samples = dist.sample(sample_shape=num_steps)
        
        if initial_states is None:
            initial_states =[]
            for lag in range(config.num_lags):
                initial_state =  cell.zero_state(batch_size, dtype= tf.float32)
                initial_states.append(initial_state)

        states_list = initial_states #list of high order states
    
        for time_step in range(num_steps):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()

            inp = inputs[:, time_step, :]

            if is_sample and time_step > 0: 
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    inp = tf.cond(tf.cast(samples[time_step], tf.bool),  lambda:tf.identity(inp) , \
                       lambda:fully_connected(cell_output, input_size, activation_fn=tf.sigmoid))
                    
            if not is_training and prev is not None and time_step >= inp_steps:
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    inp = fully_connected(cell_output, input_size, activation_fn=tf.sigmoid)
                    #print("t", time_step, ">=", burn_in_steps, "--> feeding back output into input.")

            states = _list_to_states(states_list)
            """input tensor is [batch_size, num_steps, input_size]"""
            (cell_output, state)=cell(inp, states)

            states_list = _shift(states_list, state)

            prev = cell_output
            with tf.variable_scope(tf.get_variable_scope(), reuse=False):
                output = fully_connected(cell_output, input_size, activation_fn=tf.sigmoid)
                outputs.append(output)

    outputs = tf.stack(outputs,1)
    return outputs, states_list
コード例 #4
0
    def sample_v_given_h(self, h0_sample):
        ''' This function infers state of visible units given hidden units '''
        pre_sigmoid_v1, v1_mean = self.propdown(h0_sample)
        dist = Bernoulli(probs=v1_mean, dtype=tf.float32)
        v1_sample = dist.sample()

        return [pre_sigmoid_v1, v1_mean, v1_sample]
コード例 #5
0
def tensor_rnn_with_feed_prev(cell,
                              inputs,
                              is_training,
                              config,
                              initial_states=None):
    outputs = []
    cell_output = None
    is_sample = is_training and initial_states is not None

    with tf.variable_scope("trnn") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        inputs_shape = inputs.get_shape().with_rank_at_least(3)
        batch_size = tf.shape(inputs)[0]
        num_steps = inputs_shape[1]
        input_size = int(inputs_shape[2])
        output_size = cell.output_size
        inp_steps = config.inp_steps
        acv_func = tf.sigmoid

        dist = Bernoulli(probs=config.sample_prob)
        samples = dist.sample(sample_shape=num_steps)

        if initial_states is None:
            initial_states = []
            for lag in range(config.num_lags):
                initial_state = cell.zero_state(batch_size, dtype=tf.float32)
                initial_states.append(initial_state)
        states_list = initial_states  #list of high order states

        #        for time_step in range(num_steps):
        for time_step in range(1):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()

            inp = inputs[:, time_step, :]

            if is_sample and time_step > 0:
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    inp = tf.cond(tf.cast(samples[time_step], tf.bool),
                                  lambda: tf.identity(inp), lambda: output)

            if not is_training and cell_output is not None and time_step >= inp_steps:
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    inp = output

            states = _list_to_states(states_list)
            (cell_output, state) = cell(inp, states)
            states_list = _shift(states_list, state)

            with tf.variable_scope(tf.get_variable_scope(), reuse=False):
                output = fully_connected(cell_output,
                                         input_size,
                                         activation_fn=acv_func)
                outputs.append(output)

    outputs = tf.stack(outputs, 1)
    return outputs, states_list
コード例 #6
0
    def sample(self, n=None):
        if self._bernoulli is None:
            self._bernoulli = Bernoulli(self._steps_probs)

        sample = self._bernoulli.sample(n)
        sample = tf.cumprod(sample, tf.rank(sample) - 1)
        sample = tf.reduce_sum(sample, -1)
        return sample
コード例 #7
0
    def sample_h_given_v(self, v0_sample):
        ''' This function infers state of hidden units given visible units '''
        # compute the activation of the hidden units given visible samples
        pre_sigmoid_h1, h1_mean = self.propup(v0_sample)
        dist = Bernoulli(probs=h1_mean, dtype=tf.float32)
        h1_sample = dist.sample()

        return [pre_sigmoid_h1, h1_mean, h1_sample]
コード例 #8
0
ファイル: trnn_imply.py プロジェクト: zdcuob/tensor_train_RNN
def rnn_with_feed_prev(cell, inputs, is_training, config, initial_state=None):
    prev = None
    outputs = []
    sample_prob = config.sample_prob # scheduled sampling probability

    is_sample = is_training and initial_state is not None # whether to use scheduled sampling  
 
    with tf.variable_scope("rnn") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        inputs_shape = inputs.get_shape().with_rank_at_least(3)
        batch_size = tf.shape(inputs)[0] 
        num_steps = inputs_shape[1]
        input_size = int(inputs_shape[2])
        inp_steps = config.inp_steps
        output_size = cell.output_size

        # phased lstm input
        inp_t = tf.expand_dims(tf.range(1,batch_size+1), 1)

        dist = Bernoulli(probs=config.sample_prob)
        samples = dist.sample(sample_shape=num_steps)
        # with tf.Session() as sess:
        #     print('bernoulli',samples.eval())
        if initial_state is None:
            initial_state = cell.zero_state(batch_size, dtype= tf.float32)
        state = initial_state

        for time_step in range(num_steps):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()

            inp = inputs[:, time_step, :]
            
            if is_sample and time_step > 0: 
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    inp = tf.cond(tf.cast(samples[time_step], tf.bool),  lambda:tf.identity(inp) , \
                       lambda:fully_connected(cell_output, input_size, activation_fn=tf.sigmoid))
                    
                    
            if not is_training and prev is not None and time_step >= inp_steps:
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    inp = fully_connected(prev, input_size,  activation_fn=tf.sigmoid)
                    #print("t", time_step, ">=", inp_steps, "--> feeding back output into input.")

            if isinstance(cell._cells[0], tf.contrib.rnn.PhasedLSTMCell):
                (cell_output, state) = cell((inp_t, inp), state)
            else:
                (cell_output, state) = cell(inp, state)

            prev = cell_output
            with tf.variable_scope(tf.get_variable_scope(), reuse=False):
                output = fully_connected(cell_output, input_size, activation_fn=tf.sigmoid)
                outputs.append(output)

    outputs = tf.stack(outputs, 1)
    return outputs, state
コード例 #9
0
    def _make_particles_update(self, n_steps=None, sample=True, G_fed=False):
        """Update negative particles by running Gibbs sampler
        for specified number of steps.
        """
        if n_steps is None:
            n_steps = self._n_gibbs_steps

        # self._n_particles = 1
        # self.sample_h_states = True

        with tf.name_scope('gibbs_chain'):

            logits = tf.zeros([self._n_runs, self._n_hidden])
            T = Bernoulli(logits=logits).sample(seed=self.make_random_seed())
            self._H = tf.cast(T, dtype=self._tf_dtype)
            self._H_new = tf.cast(T, dtype=self._tf_dtype)
            logits = tf.zeros([self._n_runs, self._n_visible])
            T = Bernoulli(logits=logits).sample(seed=self.make_random_seed())
            self._v = tf.cast(T, dtype=self._tf_dtype)
            self._v_new = tf.cast(T, dtype=self._tf_dtype)

            def cond(step, max_step, v, H, v_new, H_new):
                return step < max_step

            def body(step, max_step, v, H, v_new, H_new):
                # v, H, v_new, H_new = self._make_gibbs_step(v, H, v_new, H_new,
                #                                            update_v=True, sample=sample)
                # v, H, v_new, H_new = self._make_gibbs_step(H)
                v_new, _, H_new, _ = self._make_gibbs_step(H)
                return step + 1, max_step, v_new, H_new, v, H  # swap particles

            _, _, v, H, v_new, H_new = \
                tf.while_loop(cond=cond, body=body,
                              loop_vars=[tf.constant(0),
                                         n_steps,
                                         self._v, self._H,
                                         self._v_new, self._H_new],
                              parallel_iterations=10,
                              back_prop=False)
            # _, _, v, H, v_new, H_new = \
            #     tf.while_loop(cond=cond, body=body,
            #                   loop_vars=[tf.constant(0),
            #                              n_steps,
            #                              self._v, self._H,
            #                              self._v_new, self._H_new],
            #                   parallel_iterations=1,
            #                   back_prop=False)

            # v_update = self._v.assign(v)
            # v_new_update = self._v_new.assign(v_new)
            # H_updates = self._H.assign(H)
            # H_new_updates = self._H_new.assign(H_new)
            v_update = v  #self._v.assign(v)
            v_new_update = v_new  #self._v_new.assign(v_new)
            H_updates = H  #self._H.assign(H)
            H_new_updates = H_new  #self._H_new.assign(H_new)
        return v_update, H_updates, v_new_update, H_new_updates
コード例 #10
0
ファイル: customlayers.py プロジェクト: Shaoli-Huang/DropMax
 def ret(y_true, y_pred):
     bernoulli = Bernoulli(probs=retain)
     b = tf.cast(bernoulli.sample(sample_shape=tf.shape(y_true)), dtype=tf.float32)
     output = y_pred
     mask = tf.maximum(b, y_true)
     output = output * mask
     output = tf.nn.softmax(output)
     loss = keras.losses.categorical_crossentropy(y_true,output)
     return loss
コード例 #11
0
 def __init__(self, n_in, n_out, model_prob, model_lam):
     self.model_prob = model_prob
     self.model_lam = model_lam
     self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
     self.model_M = tf.Variable(
         tf.truncated_normal([n_in, n_out], stddev=0.01))
     self.model_m = tf.Variable(tf.zeros([n_out]))
     self.model_W = tf.matmul(tf.diag(self.model_bern.sample((n_in, ))),
                              self.model_M)
コード例 #12
0
 def __init__(self, input_data, output_data, model_prob, model_lam):
     self.model_prob = model_prob
     self.model_lam = model_lam
     self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
     self.model_M = tf.Variable(
         tf.truncated_normal((input_data, output_data), stddev=0.01))
     self.model_m = tf.Variable(tf.zeros((output_data)))
     self.model_W = tf.matmul(
         tf.diag(self.model_bern.sample((input_data, ))), self.model_M)
 def __init__(self, n_in, n_out, model_prob=0.9, model_lam=1e-2, name="hidden"):
     self.model_prob = model_prob    # probability to keep units
     self.model_lam = model_lam      # l^2 / 2*tau
     self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
     # with tf.variable_scope("variational_dense"):
     self.model_M = tf.get_variable("{}_M".format(name), initializer=tf.truncated_normal([n_in, n_out], stddev=0.01))
     self.model_m = tf.get_variable("{}_b".format(name), initializer=tf.zeros([n_out]))
     self.model_W = tf.matmul(
         tf.diag(self.model_bern.sample((n_in, ))), self.model_M
     )
コード例 #14
0
ファイル: customlayers.py プロジェクト: Shaoli-Huang/DropMax
 def ret(y_true, y_pred):
     bernoulli = Bernoulli(probs=retain)
     b = tf.cast(bernoulli.sample(sample_shape=tf.shape(y_true)), dtype=tf.float32)
     output = y_pred
     mask = tf.maximum(b, y_true)
     exp_output = tf.exp(output - tf.reduce_max(output, reduction_indices=[1], keep_dims=True))
     exp_output = exp_output * mask + 1e-4
     sum_output = tf.reduce_sum(output, axis=1, keep_dims=True)
     output = exp_output / sum_output
     loss = keras.losses.categorical_crossentropy(y_true,output)
     return loss
コード例 #15
0
 def first():
     first_token = self.inputs[:, 0]  # (batch_size, 1)
     select_sampler = Bernoulli(probs=1.0, dtype=tf.bool)
     select_sample = select_sampler.sample(
         sample_shape=self.batch_size)
     token_rhyme = tf.cast(tf.gather(self.table, first_token),
                           tf.float32)
     return tf.where(
         select_sample,
         tf.log(tf.multiply(token_rhyme, tf.nn.softmax(o_t))),
         tf.log(tf.nn.softmax(o_t)))
class VariationalDense:
    """Variational Dense Layer Class"""
    def __init__(self, n_in, n_out, model_prob=0.9, model_lam=1e-2, name="hidden"):
        self.model_prob = model_prob    # probability to keep units
        self.model_lam = model_lam      # l^2 / 2*tau
        self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
        # with tf.variable_scope("variational_dense"):
        self.model_M = tf.get_variable("{}_M".format(name), initializer=tf.truncated_normal([n_in, n_out], stddev=0.01))
        self.model_m = tf.get_variable("{}_b".format(name), initializer=tf.zeros([n_out]))
        self.model_W = tf.matmul(
            tf.diag(self.model_bern.sample((n_in, ))), self.model_M
        )

    def __call__(self, X, activation=tf.identity):
        if activation is None:
            activation = tf.identity
        output = activation(tf.matmul(X, self.model_W) + self.model_m)
        # if self.model_M.shape[1] == 1:
        #     output = tf.squeeze(output)
        return output

    @property
    def regularization(self):
        return self.model_lam * (
            self.model_prob * tf.reduce_sum(tf.square(self.model_M)) +
            tf.reduce_sum(tf.square(self.model_m))
        )
コード例 #17
0
class NumStepsDistribution(object):
    """Probability distribution used for the number of steps

    Transforms Bernoulli probabilities of an event = 1 into p(n) where n is the number of steps
    as described in the AIR paper."""

    def __init__(self, steps_probs):
        """

        :param steps_probs: tensor; Bernoulli success probabilities
        """
        self._steps_probs = steps_probs
        self._joint = bernoulli_to_modified_geometric(steps_probs)
        self._bernoulli = None

    def sample(self, n=None):
        if self._bernoulli is None:
            self._bernoulli = Bernoulli(self._steps_probs)

        sample = self._bernoulli.sample(n)
        sample = tf.cumprod(sample, tf.rank(sample) - 1)
        sample = tf.reduce_sum(sample, -1)
        return sample

    def prob(self, samples=None):
        if samples is None:
            return self._joint
        return sample_from_tensor(self._joint, samples)

    def log_prob(self, samples):
        prob = self.prob(samples)
        prob = clip_preserve(prob, 1e-32, prob)
        return tf.log(prob)
コード例 #18
0
    def make_distribs(self, xxx_todo_changeme):
        """Converts parameters return by `_build` into probability distributions.
        """
        (prior_where_loc, prior_where_scale, prior_what_loc, prior_what_scale, prop_prob_logit) = xxx_todo_changeme
        what_prior = Normal(prior_what_loc, prior_what_scale)
        where_prior = Normal(prior_where_loc, prior_where_scale)
        prop_prior = Bernoulli(logits=tf.squeeze(prop_prob_logit, -1))

        return what_prior, where_prior, prop_prior
コード例 #19
0
def Dropout(X, prob=0.7, train=tf.constant(False), name='Dropout'):
    from tensorflow.contrib.distributions import Bernoulli
    if not isinstance(prob, float) or prob > 1.0 or prob < 0.0:
        raise ValueError(
            'Encountered illegal value for param (prob), expecting float between 0 and 1'
        )
    with tf.name_scope(name):
        Dropout_Mask = tf.diag(
            Bernoulli(probs=prob, dtype=tf.float32).sample(
                (tf.shape(X)[-1], )), 'Dropout_Mask')
        X_dropped = tf.matmul(X, Dropout_Mask)
    return tf.cond(tf.equal(train, tf.constant(True)), lambda: X_dropped,
                   lambda: X)
コード例 #20
0
    def init_eval_model(self):
        with tf.name_scope('eval_model'):
            self.eval_alpha_state = tf.placeholder(tf.float32)
            self.eval_rho_state = tf.placeholder(tf.float32)
            self.eval_n_test = tf.placeholder(tf.int32)
            eval_n_minibatch = self.eval_n_test - self.cs

            # Data Placeholder
            with tf.name_scope('input'):
                self.eval_ph = tf.placeholder(tf.int32)
                words = self.eval_ph

            # Index Masks
            with tf.name_scope('context_mask'):
                p_mask = tf.cast(
                    tf.range(self.cs / 2, eval_n_minibatch + self.cs / 2),
                    tf.int32)
                rows = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, self.cs / 2), [0]),
                            [eval_n_minibatch, 1]), tf.int32)
                columns = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, eval_n_minibatch), [1]),
                            [1, self.cs / 2]), tf.int32)
                ctx_mask = tf.concat(
                    [rows + columns, rows + columns + self.cs / 2 + 1], 1)

            with tf.name_scope('natural_param'):
                with tf.name_scope('target_word'):
                    p_idx = tf.gather(words, p_mask)
                    p_rho = tf.squeeze(tf.gather(self.eval_rho_state, p_idx))

                # Negative samples
                with tf.name_scope('negative_samples'):
                    self.eval_n_idx = tf.placeholder(tf.int32)
                    n_rho = tf.gather(self.eval_rho_state, self.eval_n_idx)

                with tf.name_scope('context'):
                    ctx_idx = tf.squeeze(tf.gather(words, ctx_mask))
                    ctx_alphas = tf.gather(self.eval_alpha_state, ctx_idx)

                # Natural parameter
                ctx_sum = tf.reduce_sum(ctx_alphas, [1])
                p_eta = tf.expand_dims(
                    tf.reduce_sum(tf.multiply(p_rho, ctx_sum), -1), 1)
                n_eta = tf.reduce_sum(
                    tf.multiply(
                        n_rho,
                        tf.tile(tf.expand_dims(ctx_sum, 1), [1, self.ns, 1])),
                    -1)

            # Conditional likelihood
            y_pos = Bernoulli(logits=p_eta)
            y_neg = Bernoulli(logits=n_eta)

            ll_pos = y_pos.log_prob(1.0)
            ll_neg = tf.reduce_mean(y_neg.log_prob(0.0), axis=1)

            self.eval_ll = tf.nn.moments(ll_pos + ll_neg, axes=[0, 1])
コード例 #21
0
    def _build_graph(self):

        with tf.variable_scope('vae'):
            self.x = tf.placeholder(tf.float32, shape=[None, self._observation_dim])

            with tf.variable_scope('encoder'):
                encoded = self._encode(self.x, self._latent_dim)

            with tf.variable_scope('latent'):
                self.mean = encoded[:, :self._latent_dim]
                logvar = encoded[:, self._latent_dim:]
                stddev = tf.sqrt(tf.exp(logvar))
                epsilon = tf.random_normal([self._batch_size, self._latent_dim])
                self.z = self.mean + stddev * epsilon

            with tf.variable_scope('decoder'):
                decoded = self._decode(self.z, self._observation_dim)
                self.obs_mean = decoded
                if self._observation_distribution == 'Gaussian':
                    obs_epsilon = tf.random_normal([self._batch_size, self._observation_dim])
                    self.sample = self.obs_mean + self._observation_std * obs_epsilon
                else:
                    self.sample = Bernoulli(probs=self.obs_mean).sample()


            with tf.variable_scope('loss'):
                with tf.variable_scope('kl-divergence'):
                    kl = self._kl_diagnormal_stdnormal(self.mean, logvar)

                if self._observation_distribution == 'Gaussian':
                    with tf.variable_scope('gaussian'):
                        obj = self._gaussian_log_likelihood(self.x, self.obs_mean, self._observation_std)
                else:
                    with tf.variable_scope('bernoulli'):
                        obj = self._bernoulli_log_likelihood(self.x, self.obs_mean)

                self._loss = (kl + obj) / self._batch_size

            with tf.variable_scope('optimizer'):
                optimizer = tf.train.RMSPropOptimizer(learning_rate=self._learning_rate)
            with tf.variable_scope('training-step'):
                self._train = optimizer.minimize(self._loss)

            self._sesh = tf.Session()
            init = tf.global_variables_initializer()
            self._sesh.run(init)
コード例 #22
0
    def _make_ais(self):
        with tf.name_scope('annealed_importance_sampling'):

            # x_0 ~ Ber(0.5) of size (M, H_1)
            logits = tf.zeros([self._n_ais_runs, self._n_hiddens[0]])
            T = Bernoulli(logits=logits).sample(seed=self.make_random_seed())
            x_0 = tf.cast(T, dtype=self._tf_dtype)

            # x_1 ~ T_1(x_1 | x_0)
            x_1 = self._make_ais_next_sample(x_0, self._delta_beta)

            # -log p_0(x_1)
            log_Z = -self._unnormalized_log_prob_H0(x_1, 0.)

            def cond(log_Z, x, beta, delta_beta):
                return beta < 1. - delta_beta + 1e-5

            def body(log_Z, x, beta, delta_beta):
                # with tf.control_dependencies([tf.Print('beta', [beta])]):
                # + log p_i(x_i)
                log_Z += self._unnormalized_log_prob_H0(x, beta)
                # x_{i + 1} ~ T_{i + 1}(x_{i + 1} | x_i)
                x_new = self._make_ais_next_sample(x, beta + delta_beta)
                # -log p_i(x_{i + 1})
                log_Z -= self._unnormalized_log_prob_H0(x_new, beta)
                return log_Z, x_new, beta + delta_beta, delta_beta

            log_Z, x_M, _, _ = tf.while_loop(cond=cond, body=body,
                                             loop_vars=[log_Z, x_1, self._delta_beta,
                                                                    self._delta_beta],
                                             back_prop=False,
                                             parallel_iterations=1)
            # + log p_M(x_M)
            log_Z += self._unnormalized_log_prob_H0(x_M, 1.)

            # + log(Z_0) = (V + H_1 + H_2) * log(2)
            log_Z0 = self._n_visible + self._n_hiddens[0] + self._n_hiddens[1]
            log_Z0 = tf.cast(log_Z0, dtype=self._tf_dtype)
            log_Z0 *= tf.cast(tf.log(2.), dtype=self._tf_dtype)
            log_Z += log_Z0

        tf.add_to_collection('log_Z', log_Z)
コード例 #23
0
class VariationalDense:
    """Variational Dense Layer Class"""
    def __init__(self,
                 n_in,
                 n_out,
                 model_prob=0.9,
                 model_lam=1e-2,
                 activation=None,
                 name="hidden"):
        self.model_prob = model_prob  # probability to keep units
        self.model_lam = model_lam  # l^2 / 2*tau
        self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
        self.dropout_mask = self.model_bern.sample((n_in, ))

        if activation is None:
            self.activation = tf.identity
        else:
            self.activation = activation

        kernel_initializer = tf.initializers.truncated_normal(mean=0.0,
                                                              stddev=0.01)
        self.model_M = tf.get_variable("{}_M".format(name),
                                       initializer=kernel_initializer(
                                           [n_in,
                                            n_out]))  # variational parameters
        self.model_m = tf.get_variable("{}_b".format(name),
                                       initializer=tf.zeros([n_out]))

        self.model_W = tf.matmul(tf.diag(self.dropout_mask), self.model_M)
        # self.model_W = self.model_M

    def __call__(self, X):
        output = self.activation(tf.matmul(X, self.model_W) + self.model_m)
        if self.model_M.shape[1] == 1:
            output = tf.squeeze(output)
        return output

    @property
    def regularization(self):
        return self.model_lam * (
            self.model_prob * tf.reduce_sum(tf.square(self.model_M)) +
            tf.reduce_sum(tf.square(self.model_m)))
コード例 #24
0
    def set_input_shape(self, input_shape, reuse):

        batch_size, rows, cols, input_channels = input_shape
        kernel_shape = tuple(
            self.kernel_shape) + (input_channels, self.output_channels)
        assert len(kernel_shape) == 4
        assert all(isinstance(e, int) for e in kernel_shape), kernel_shape

        with tf.variable_scope(self.scope_name + '_init', reuse):

            init = tf.truncated_normal(kernel_shape,
                                       stddev=0.2,
                                       dtype=tf.float32)
            self.kernels = tf.get_variable("k", initializer=init)
            k_summ = tf.summary.histogram(name="k", values=self.kernels)

            if self.binary:
                from tensorflow.contrib.distributions import Bernoulli
                with self.G.gradient_override_map(
                    {"Bernoulli": "QuantizeGrad"}):
                    self.kernels = 2. * Bernoulli(
                        probs=hard_sigmoid(
                            self.kernels), dtype=tf.float32).sample() - 1.
            else:
                from tensorflow.contrib.distributions import MultivariateNormalDiag
                with self.G.gradient_override_map(
                    {"MultivariateNormalDiag": "QuantizeGrad"}):
                    self.kernels = MultivariateNormalDiag(
                        loc=self.kernels).sample()

            k_rand_summ = tf.summary.histogram(name="k_rand",
                                               values=self.kernels)

            orig_input_batch_size = input_shape[0]
            input_shape = list(input_shape)
            input_shape[0] = 1
            dummy_batch = tf.zeros(input_shape)
            dummy_output = self.fprop(dummy_batch, False)
            output_shape = [int(e) for e in dummy_output.get_shape()]
            output_shape[0] = 1
            self.output_shape = tuple(output_shape)
コード例 #25
0
class VariationalDense:
    """Variational Dense Layer Class"""
    def __init__(self, n_in, n_out, model_prob, model_lam):
        self.model_prob = model_prob
        self.model_lam = model_lam
        self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
        self.model_M = tf.Variable(
            tf.truncated_normal([n_in, n_out], stddev=0.01))
        self.model_m = tf.Variable(tf.zeros([n_out]))
        self.model_W = tf.matmul(tf.diag(self.model_bern.sample((n_in, ))),
                                 self.model_M)

    def __call__(self, X, activation=tf.identity):
        output = activation(tf.matmul(X, self.model_W) + self.model_m)
        if self.model_M.shape[1] == 1:
            output = tf.squeeze(output)
        return output

    @property
    def regularization(self):
        return self.model_lam * (
            self.model_prob * tf.reduce_sum(tf.square(self.model_M)) +
            tf.reduce_sum(tf.square(self.model_m)))
コード例 #26
0
ファイル: func.py プロジェクト: nke001/R-Net-1
 def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True):
     
     outputs = [tf.transpose(inputs, [1, 0, 2])]
     import ipdb; ipdb.set_trace()
     
     # only 2 layers, first layer is bidirectional
     # second layer gets output from first layer 
     
     for layer in range(self.num_layers):
         gru_fw, gru_bw = self.grus[layer]
         param_fw, param_bw = self.params[layer]
         init_fw, init_bw = self.inits[layer]
         mask_fw, mask_bw = self.dropout_mask[layer]
         with tf.variable_scope("fw"):
             out_fw, _ = gru_fw(outputs[-1] * mask_fw, init_fw, param_fw)
             if layer == 0:                    
                 import ipdb;ipdb.set_trace()
                 b1 = tf.nn.relu(tf.matmul(out_fw, self.b1_w))
                 bnd = tf.nn.sigmoid(tf.matmul(b1, self.b2_w))
                 gates = Bernoulli(bnd)
                 
                 # TODO: initially just take the hidden state from the last layer and try to predict the boundary
                 #bnd_input = tf.concat([out_fw, ])
                 #h1_out = tf.
         with tf.variable_scope("bw"):
             inputs_bw = tf.reverse_sequence(
                 outputs[-1] * mask_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1)
             out_bw, _ = gru_bw(inputs_bw, init_bw, param_bw)
             out_bw = tf.reverse_sequence(
                 out_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1)
         outputs.append(tf.concat([out_fw, out_bw], axis=2))
     if concat_layers:
         res = tf.concat(outputs[1:], axis=2)
     else:
         res = outputs[-1]
     res = tf.transpose(res, [1, 0, 2])
     return res
コード例 #27
0
    def __init__(self, args, d, logdir):
        super(dynamic_bern_emb_model, self).__init__(args, d, logdir)

        with tf.name_scope('model'):
            with tf.name_scope('embeddings'):
                self.alpha = tf.Variable(self.alpha_init,
                                         name='alpha',
                                         trainable=self.alpha_trainable)

                self.rho_t = {}
                for t in range(-1, self.T):
                    self.rho_t[t] = tf.Variable(
                        self.rho_init +
                        0.001 * tf.random_normal([self.L, self.K]) / self.K,
                        name='rho_' + str(t))

                with tf.name_scope('priors'):
                    global_prior = Normal(loc=0.0, scale=self.sig)
                    local_prior = Normal(loc=0.0, scale=self.sig / 100.0)

                    self.log_prior = tf.reduce_sum(
                        global_prior.log_prob(self.alpha))
                    self.log_prior = tf.reduce_sum(
                        global_prior.log_prob(self.rho_t[-1]))
                    for t in range(self.T):
                        self.log_prior += tf.reduce_sum(
                            local_prior.log_prob(self.rho_t[t] -
                                                 self.rho_t[t - 1]))

            with tf.name_scope('likelihood'):
                self.placeholders = {}
                self.y_pos = {}
                self.y_neg = {}
                self.ll_pos = 0.0
                self.ll_neg = 0.0
                for t in range(self.T):
                    # Index Masks
                    p_mask = tf.range(int(self.cs / 2),
                                      self.n_minibatch[t] + int(self.cs / 2))
                    rows = tf.tile(
                        tf.expand_dims(tf.range(0, int(self.cs / 2)), [0]),
                        [self.n_minibatch[t], 1])
                    columns = tf.tile(
                        tf.expand_dims(tf.range(0, self.n_minibatch[t]), [1]),
                        [1, int(self.cs / 2)])

                    ctx_mask = tf.concat([
                        rows + columns, rows + columns + int(self.cs / 2) + 1
                    ], 1)

                    # Data Placeholder
                    self.placeholders[t] = tf.placeholder(
                        tf.int32, shape=(self.n_minibatch[t] + self.cs))

                    # Taget and Context Indices
                    p_idx = tf.gather(self.placeholders[t], p_mask)
                    ctx_idx = tf.squeeze(
                        tf.gather(self.placeholders[t], ctx_mask))

                    # Negative samples
                    unigram_logits = tf.tile(
                        tf.expand_dims(tf.log(tf.constant(self.unigram)), [0]),
                        [self.n_minibatch[t], 1])
                    n_idx = tf.multinomial(unigram_logits, self.ns)

                    # Context vectors
                    ctx_alphas = tf.gather(self.alpha, ctx_idx)

                    p_rho = tf.squeeze(tf.gather(self.rho_t[t], p_idx))
                    n_rho = tf.gather(self.rho_t[t], n_idx)

                    # Natural parameter
                    ctx_sum = tf.reduce_sum(ctx_alphas, [1])
                    p_eta = tf.expand_dims(
                        tf.reduce_sum(tf.multiply(p_rho, ctx_sum), -1), 1)
                    n_eta = tf.reduce_sum(
                        tf.multiply(
                            n_rho,
                            tf.tile(tf.expand_dims(ctx_sum, 1),
                                    [1, self.ns, 1])), -1)

                    # Conditional likelihood
                    self.y_pos[t] = Bernoulli(logits=p_eta)
                    self.y_neg[t] = Bernoulli(logits=n_eta)

                    self.ll_pos += tf.reduce_sum(self.y_pos[t].log_prob(1.0))
                    self.ll_neg += tf.reduce_sum(self.y_neg[t].log_prob(0.0))

            self.loss = -(self.n_epochs *
                          (self.ll_pos + self.ll_neg) + self.log_prior)
コード例 #28
0
    def __init__(self, args, d, logdir):
        super(bern_emb_model, self).__init__(args, d, logdir)
        self.n_minibatch = self.n_minibatch.sum()

        with tf.name_scope('model'):
            # Data Placeholder
            with tf.name_scope('input'):
                self.placeholders = tf.placeholder(tf.int32)
                self.words = self.placeholders

            # Index Masks
            with tf.name_scope('context_mask'):
                self.p_mask = tf.cast(
                    tf.range(int(self.cs / 2),
                             self.n_minibatch + int(self.cs / 2)), tf.int32)
                rows = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, int(self.cs / 2)), [0]),
                            [self.n_minibatch, 1]), tf.int32)
                columns = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, self.n_minibatch), [1]),
                            [1, int(self.cs / 2)]), tf.int32)
                self.ctx_mask = tf.concat(
                    [rows + columns, rows + columns + int(self.cs / 2) + 1], 1)

            with tf.name_scope('embeddings'):
                self.rho = tf.Variable(self.rho_init, name='rho')
                self.alpha = tf.Variable(self.alpha_init,
                                         name='alpha',
                                         trainable=self.alpha_trainable)

                with tf.name_scope('priors'):
                    prior = Normal(loc=0.0, scale=self.sig)
                    if self.alpha_trainable:
                        self.log_prior = tf.reduce_sum(
                            prior.log_prob(self.rho) +
                            prior.log_prob(self.alpha))
                    else:
                        self.log_prior = tf.reduce_sum(prior.log_prob(
                            self.rho))

            with tf.name_scope('natural_param'):
                # Taget and Context Indices
                with tf.name_scope('target_word'):
                    self.p_idx = tf.gather(self.words, self.p_mask)
                    self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx))

                # Negative samples
                with tf.name_scope('negative_samples'):
                    unigram_logits = tf.tile(
                        tf.expand_dims(tf.log(tf.constant(self.unigram)), [0]),
                        [self.n_minibatch, 1])
                    self.n_idx = tf.multinomial(unigram_logits, self.ns)
                    self.n_rho = tf.gather(self.rho, self.n_idx)

                with tf.name_scope('context'):
                    self.ctx_idx = tf.squeeze(
                        tf.gather(self.words, self.ctx_mask))
                    self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx)

                # Natural parameter
                ctx_sum = tf.reduce_sum(self.ctx_alphas, [1])
                self.p_eta = tf.expand_dims(
                    tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1)
                self.n_eta = tf.reduce_sum(
                    tf.multiply(
                        self.n_rho,
                        tf.tile(tf.expand_dims(ctx_sum, 1), [1, self.ns, 1])),
                    -1)

            # Conditional likelihood
            self.y_pos = Bernoulli(logits=self.p_eta)
            self.y_neg = Bernoulli(logits=self.n_eta)

            self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0))
            self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0))

            self.log_likelihood = self.ll_pos + self.ll_neg

            scale = 1.0 * self.N / self.n_minibatch
            self.loss = -(self.n_epochs * self.log_likelihood + self.log_prior)
コード例 #29
0
 def bernoulli_log_probs(args):
     from tensorflow.contrib.distributions import Bernoulli
     mu, x = args
     log_px = Bernoulli(probs=mu, name='dec_bernoulli').log_prob(x)
     return log_px
コード例 #30
0
    def minimize(self, loss, var_list=None, global_step=None):
        orig_graph_view = None
        trainable_vars = var_list if var_list != None else tf.trainable_variables(
        )
        if self.inputs is not None:
            seed_ops = [t.op for t in self.inputs]
            result = list(seed_ops)
            wave = set(seed_ops)
            while wave:  # stolen from grap_editor.select
                new_wave = set()
                for op in wave:
                    for new_t in op.outputs:
                        if new_t == loss:
                            continue
                        for new_op in new_t.consumers():
                            #if new_op not in result and is_within(new_op):
                            if new_op not in result:
                                new_wave.add(new_op)
                for op in new_wave:
                    if op not in result:
                        result.append(op)
                wave = new_wave
            orig_graph_view = ge.sgv(result)
        else:
            orig_graph_view = ge.sgv(self.work_graph)

        self.global_step_tensor = tf.Variable(
            0, name='global_step',
            trainable=False) if global_step is None else global_step

        # Perturbations
        deltas = {}
        n_perturbations = {}
        p_perturbations = {}
        with tf.name_scope("Perturbator"):
            self.c_t = tf.div(
                self.c,
                tf.pow(
                    tf.add(tf.cast(self.global_step_tensor, tf.float32),
                           tf.constant(1, dtype=tf.float32)), self.gamma),
                name="SPSA_ct")
            # self.c_t = 0.00 #MOD
            for var in trainable_vars:
                self.num_params += self._mul_dims(var.get_shape())
                var_name = var.name.split(':')[0]
                random = Bernoulli(tf.fill(var.get_shape(), 0.5),
                                   dtype=tf.float32)
                deltas[var] = tf.subtract(tf.constant(1, dtype=tf.float32),
                                          tf.scalar_mul(
                                              tf.constant(2, dtype=tf.float32),
                                              random.sample(1)[0]),
                                          name="SPSA_delta")
                c_t_delta = tf.scalar_mul(tf.reshape(self.c_t, []),
                                          deltas[var])
                n_perturbations[var_name + '/read:0'] = tf.subtract(
                    var, c_t_delta, name="perturb_n")
                p_perturbations[var_name + '/read:0'] = tf.add(
                    var, c_t_delta, name="perturb_p")
        # print("{} parameters".format(self.num_params))

        # Evaluator
        with tf.name_scope("Evaluator"):
            _, self.ninfo = self._clone_model(orig_graph_view, n_perturbations,
                                              'N_Eval')
            _, self.pinfo = self._clone_model(orig_graph_view, p_perturbations,
                                              'P_Eval')

        # Weight Updater
        optimizer_ops = []
        with tf.control_dependencies([loss]):
            with tf.name_scope('Updater'):
                a_t = self.a / (tf.pow(
                    tf.add(tf.cast(self.global_step_tensor, tf.float32),
                           tf.constant(1, dtype=tf.float32)), self.alpha))
                # a_t = 0.00 #MOD
                for var in trainable_vars:
                    l_pos = self.pinfo.transformed(loss)
                    l_neg = self.ninfo.transformed(loss)
                    # print( "l_pos: ", l_pos)
                    # print( "l_neg: ", l_neg)
                    ghat = (l_pos - l_neg) / (tf.constant(2, dtype=tf.float32)
                                              * self.c_t * deltas[var])
                    optimizer_ops.append(tf.assign_sub(var, a_t * ghat))
        grp = control_flow_ops.group(*optimizer_ops)
        with tf.control_dependencies([grp]):
            tf.assign_add(self.global_step_tensor,
                          tf.constant(1, dtype=self.global_step_tensor.dtype))

        return grp