Exemple #1
0
    def init_eval_model(self):
        with tf.name_scope('eval_model'):
            self.eval_alpha_state = tf.placeholder(tf.float32)
            self.eval_rho_state = tf.placeholder(tf.float32)
            self.eval_n_test = tf.placeholder(tf.int32)
            eval_n_minibatch = self.eval_n_test - self.cs

            # Data Placeholder
            with tf.name_scope('input'):
                self.eval_ph = tf.placeholder(tf.int32)
                words = self.eval_ph

            # Index Masks
            with tf.name_scope('context_mask'):
                p_mask = tf.cast(
                    tf.range(self.cs / 2, eval_n_minibatch + self.cs / 2),
                    tf.int32)
                rows = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, self.cs / 2), [0]),
                            [eval_n_minibatch, 1]), tf.int32)
                columns = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, eval_n_minibatch), [1]),
                            [1, self.cs / 2]), tf.int32)
                ctx_mask = tf.concat(
                    [rows + columns, rows + columns + self.cs / 2 + 1], 1)

            with tf.name_scope('natural_param'):
                with tf.name_scope('target_word'):
                    p_idx = tf.gather(words, p_mask)
                    p_rho = tf.squeeze(tf.gather(self.eval_rho_state, p_idx))

                # Negative samples
                with tf.name_scope('negative_samples'):
                    self.eval_n_idx = tf.placeholder(tf.int32)
                    n_rho = tf.gather(self.eval_rho_state, self.eval_n_idx)

                with tf.name_scope('context'):
                    ctx_idx = tf.squeeze(tf.gather(words, ctx_mask))
                    ctx_alphas = tf.gather(self.eval_alpha_state, ctx_idx)

                # Natural parameter
                ctx_sum = tf.reduce_sum(ctx_alphas, [1])
                p_eta = tf.expand_dims(
                    tf.reduce_sum(tf.multiply(p_rho, ctx_sum), -1), 1)
                n_eta = tf.reduce_sum(
                    tf.multiply(
                        n_rho,
                        tf.tile(tf.expand_dims(ctx_sum, 1), [1, self.ns, 1])),
                    -1)

            # Conditional likelihood
            y_pos = Bernoulli(logits=p_eta)
            y_neg = Bernoulli(logits=n_eta)

            ll_pos = y_pos.log_prob(1.0)
            ll_neg = tf.reduce_mean(y_neg.log_prob(0.0), axis=1)

            self.eval_ll = tf.nn.moments(ll_pos + ll_neg, axes=[0, 1])
class bern_emb_model(emb_model):
    def __init__(self, args, d, logdir):
        super(bern_emb_model, self).__init__(args, d, logdir)
        self.n_minibatch = self.n_minibatch.sum()

        with tf.name_scope('model'):
            # Data Placeholder
            with tf.name_scope('input'):
                self.placeholders = tf.placeholder(tf.int32)
                self.words = self.placeholders

            # Index Masks
            with tf.name_scope('context_mask'):
                self.p_mask = tf.cast(
                    tf.range(int(self.cs / 2),
                             self.n_minibatch + int(self.cs / 2)), tf.int32)
                rows = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, int(self.cs / 2)), [0]),
                            [self.n_minibatch, 1]), tf.int32)
                columns = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, self.n_minibatch), [1]),
                            [1, int(self.cs / 2)]), tf.int32)
                self.ctx_mask = tf.concat(
                    [rows + columns, rows + columns + int(self.cs / 2) + 1], 1)

            with tf.name_scope('embeddings'):
                self.rho = tf.Variable(self.rho_init, name='rho')
                self.alpha = tf.Variable(self.alpha_init,
                                         name='alpha',
                                         trainable=self.alpha_trainable)

                with tf.name_scope('priors'):
                    prior = Normal(loc=0.0, scale=self.sig)
                    if self.alpha_trainable:
                        self.log_prior = tf.reduce_sum(
                            prior.log_prob(self.rho) +
                            prior.log_prob(self.alpha))
                    else:
                        self.log_prior = tf.reduce_sum(prior.log_prob(
                            self.rho))

            with tf.name_scope('natural_param'):
                # Taget and Context Indices
                with tf.name_scope('target_word'):
                    self.p_idx = tf.gather(self.words, self.p_mask)
                    self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx))

                # Negative samples
                with tf.name_scope('negative_samples'):
                    unigram_logits = tf.tile(
                        tf.expand_dims(tf.log(tf.constant(self.unigram)), [0]),
                        [self.n_minibatch, 1])
                    self.n_idx = tf.multinomial(unigram_logits, self.ns)
                    self.n_rho = tf.gather(self.rho, self.n_idx)

                with tf.name_scope('context'):
                    self.ctx_idx = tf.squeeze(
                        tf.gather(self.words, self.ctx_mask))
                    self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx)

                # Natural parameter
                ctx_sum = tf.reduce_sum(self.ctx_alphas, [1])
                self.p_eta = tf.expand_dims(
                    tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1)
                self.n_eta = tf.reduce_sum(
                    tf.multiply(
                        self.n_rho,
                        tf.tile(tf.expand_dims(ctx_sum, 1), [1, self.ns, 1])),
                    -1)

            # Conditional likelihood
            self.y_pos = Bernoulli(logits=self.p_eta)
            self.y_neg = Bernoulli(logits=self.n_eta)

            self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0))
            self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0))

            self.log_likelihood = self.ll_pos + self.ll_neg

            scale = 1.0 * self.N / self.n_minibatch
            self.loss = -(self.n_epochs * self.log_likelihood + self.log_prior)

    def dump(self, fname):
        with self.sess.as_default():
            dat = {'rho': self.rho.eval(), 'alpha': self.alpha.eval()}
        pickle.dump(dat, open(fname, "ab+"))
Exemple #3
0
    return tf.matmul(hidden, w_2)

### LOGISTIC REGRESSION MODEL
left_ex = tf.placeholder(tf.float32, shape = (mb, K))
right_ex = tf.placeholder(tf.float32, shape = (mb, K))
g_ex = tf.placeholder(tf.float32, shape = (mb, K))

left_eta = neural_network(left_ex, w_1, w_2)
right_eta = neural_network(right_ex, w_1, w_2)
g_eta = neural_network(g_ex, w_1, w_2)

left_y = Bernoulli(logits = left_eta)
right_y = Bernoulli(logits = right_eta)
g_y = Bernoulli(logits = g_eta)

left_bias =  tf.reduce_mean(left_y.log_prob(1.0))
right_bias =  tf.reduce_mean(right_y.log_prob(0.0)) 
neutral =  tf.reduce_mean(g_y.log_prob(0.5))

loss = - (log_prior + 1000.0*(left_bias + right_bias + neutral))

### TRAINING
optimizer = tf.train.AdamOptimizer()
train = optimizer.minimize(loss)
sess = tf.Session()
with sess.as_default():
    tf.global_variables_initializer().run()

saver = tf.train.Saver()
with tf.name_scope('objective'):
    tf.summary.scalar('loss', loss)
Exemple #4
0
    def fit(self,
            data,
            epochs=1000,
            max_seconds=600,
            activation=tf.nn.elu,
            batch_norm_decay=0.9,
            learning_rate=1e-5,
            batch_sz=1024,
            adapt_lr=False,
            print_progress=True,
            show_fig=True):

        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        # static features
        X = data['X_train_static_mins']
        N, D = X.shape
        self.X = tf.placeholder(tf.float32, shape=(None, D), name='X')

        # timeseries features
        X_time = data['X_train_time_0']
        T1, N1, D1 = X_time.shape
        assert N == N1
        self.X_time = tf.placeholder(tf.float32,
                                     shape=(T1, None, D1),
                                     name='X_time')
        self.train = tf.placeholder(tf.bool, shape=(), name='train')
        self.rnn_keep_p_encode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_encode')
        self.rnn_keep_p_decode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_decode')
        adp_learning_rate = tf.placeholder(tf.float32,
                                           shape=(),
                                           name='adp_learning_rate')

        he_init = variance_scaling_initializer()
        bn_params = {
            'is_training': self.train,
            'decay': batch_norm_decay,
            'updates_collections': None
        }
        latent_size = self.encoder_layer_sizes[-1]

        inputs = self.X
        with tf.variable_scope('static_encoder'):
            for layer_size, keep_p in zip(self.encoder_layer_sizes[:-1],
                                          self.encoder_dropout[:-1]):
                inputs = dropout(inputs, keep_p, is_training=self.train)
                inputs = fully_connected(inputs,
                                         layer_size,
                                         weights_initializer=he_init,
                                         activation_fn=activation,
                                         normalizer_fn=batch_norm,
                                         normalizer_params=bn_params)

        if self.rnn_encoder_layer_sizes:
            with tf.variable_scope('rnn_encoder'):
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_encoder_dropout)
                    for s in self.rnn_encoder_layer_sizes
                ])
                time_inputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                        self.X_time,
                                                        swap_memory=True,
                                                        time_major=True,
                                                        dtype=tf.float32)
                time_inputs = tf.transpose(time_inputs, perm=(1, 0, 2))
                time_inputs = tf.reshape(
                    time_inputs,
                    shape=(-1, self.rnn_encoder_layer_sizes[-1] * T1))

            inputs = tf.concat([inputs, time_inputs], axis=1)

        with tf.variable_scope('latent_space'):
            inputs = dropout(inputs,
                             self.encoder_dropout[-1],
                             is_training=self.train)
            loc = fully_connected(inputs,
                                  latent_size,
                                  weights_initializer=he_init,
                                  activation_fn=None,
                                  normalizer_fn=batch_norm,
                                  normalizer_params=bn_params)
            scale = fully_connected(inputs,
                                    latent_size,
                                    weights_initializer=he_init,
                                    activation_fn=tf.nn.softplus,
                                    normalizer_fn=batch_norm,
                                    normalizer_params=bn_params)

            standard_normal = Normal(loc=np.zeros(latent_size,
                                                  dtype=np.float32),
                                     scale=np.ones(latent_size,
                                                   dtype=np.float32))
            e = standard_normal.sample(tf.shape(loc)[0])
            outputs = e * scale + loc

            static_output_size = self.decoder_layer_sizes[0]
            if self.rnn_decoder_layer_sizes:
                time_output_size = self.rnn_decoder_layer_sizes[0] * T1
                output_size = static_output_size + time_output_size
            else:
                output_size = static_output_size
            outputs = fully_connected(outputs,
                                      output_size,
                                      weights_initializer=he_init,
                                      activation_fn=activation,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)
            if self.rnn_decoder_layer_sizes:
                outputs, time_outputs = tf.split(
                    outputs, [static_output_size, time_output_size], axis=1)

        with tf.variable_scope('static_decoder'):
            for layer_size, keep_p in zip(self.decoder_layer_sizes,
                                          self.decoder_dropout[:-1]):
                outputs = dropout(outputs, keep_p, is_training=self.train)
                outputs = fully_connected(outputs,
                                          layer_size,
                                          weights_initializer=he_init,
                                          activation_fn=activation,
                                          normalizer_fn=batch_norm,
                                          normalizer_params=bn_params)
            outputs = dropout(outputs,
                              self.decoder_dropout[-1],
                              is_training=self.train)
            outputs = fully_connected(outputs,
                                      D,
                                      weights_initializer=he_init,
                                      activation_fn=None,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)

            X_hat = Bernoulli(logits=outputs)
            self.posterior_predictive = X_hat.sample()
            self.posterior_predictive_probs = tf.nn.sigmoid(outputs)

        if self.rnn_decoder_layer_sizes:
            with tf.variable_scope('rnn_decoder'):
                self.rnn_decoder_layer_sizes.append(D1)
                time_output_size = self.rnn_decoder_layer_sizes[0]
                time_outputs = tf.reshape(time_outputs,
                                          shape=(-1, T1, time_output_size))
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_decoder_dropout)
                    for s in self.rnn_decoder_layer_sizes
                ])
                time_outputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                         time_outputs,
                                                         swap_memory=True,
                                                         time_major=True,
                                                         dtype=tf.float32)
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                time_outputs = tf.reshape(time_outputs, shape=(-1, T1 * D1))
                X_hat_time = Bernoulli(logits=time_outputs)
                posterior_predictive_time = X_hat_time.sample()
                posterior_predictive_time = tf.reshape(
                    posterior_predictive_time, shape=(-1, T1, D1))
                self.posterior_predictive_time = tf.transpose(
                    posterior_predictive_time, perm=(1, 0, 2))
                self.posterior_predictive_probs_time = tf.nn.sigmoid(
                    time_outputs)

        kl_div = -tf.log(scale) + 0.5 * (scale**2 + loc**2) - 0.5
        kl_div = tf.reduce_sum(kl_div, axis=1)

        expected_log_likelihood = tf.reduce_sum(X_hat.log_prob(self.X), axis=1)
        X_time_trans = tf.transpose(self.X_time, perm=(1, 0, 2))
        X_time_reshape = tf.reshape(X_time_trans, shape=(-1, T1 * D1))
        if self.rnn_encoder_layer_sizes:
            expected_log_likelihood_time = tf.reduce_sum(
                X_hat_time.log_prob(X_time_reshape), axis=1)
            elbo = -tf.reduce_sum(expected_log_likelihood +
                                  expected_log_likelihood_time - kl_div)
        else:
            elbo = -tf.reduce_sum(expected_log_likelihood - kl_div)
        train_op = tf.train.AdamOptimizer(
            learning_rate=adp_learning_rate).minimize(elbo)

        tf.summary.scalar('elbo', elbo)
        if self.save_file:
            saver = tf.train.Saver()

        if self.tensorboard:
            for v in tf.trainable_variables():
                tf.summary.histogram(v.name, v)
            train_merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter(self.tensorboard)

        self.init_op = tf.global_variables_initializer()
        n = 0
        n_batches = N // batch_sz
        costs = list()
        min_cost = np.inf

        t0 = dt.now()
        with tf.Session() as sess:
            sess.run(self.init_op)
            for epoch in range(epochs):
                idxs = shuffle(range(N))
                X_train = X[idxs]
                X_train_time = X_time[:, idxs]

                for batch in range(n_batches):
                    n += 1
                    X_batch = X_train[batch * batch_sz:(batch + 1) * batch_sz]
                    X_batch_time = X_train_time[:,
                                                batch * batch_sz:(batch + 1) *
                                                batch_sz]

                    sess.run(train_op,
                             feed_dict={
                                 self.X: X_batch,
                                 self.X_time: X_batch_time,
                                 self.rnn_keep_p_encode:
                                 self.rnn_encoder_dropout,
                                 self.rnn_keep_p_decode:
                                 self.rnn_decoder_dropout,
                                 self.train: True,
                                 adp_learning_rate: learning_rate
                             })
                    if n % 100 == 0 and print_progress:
                        cost = sess.run(elbo,
                                        feed_dict={
                                            self.X: X,
                                            self.X_time: X_time,
                                            self.rnn_keep_p_encode: 1.0,
                                            self.rnn_keep_p_decode: 1.0,
                                            self.train: False
                                        })
                        cost /= N
                        costs.append(cost)

                        if adapt_lr and epoch > 0:
                            if cost < min_cost:
                                min_cost = cost
                            elif cost > min_cost * 1.01:
                                learning_rate *= 0.75
                                if print_progress:
                                    print('Updating Learning Rate',
                                          learning_rate)

                        print('Epoch:', epoch, 'Batch:', batch, 'Cost:', cost)

                        if self.tensorboard:
                            train_sum = sess.run(train_merge,
                                                 feed_dict={
                                                     self.X: X,
                                                     self.X_time: X_time,
                                                     self.rnn_keep_p_encode:
                                                     1.0,
                                                     self.rnn_keep_p_decode:
                                                     1.0,
                                                     self.train: False
                                                 })
                            writer.add_summary(train_sum, n)

                seconds = (dt.now() - t0).seconds
                if seconds > max_seconds:
                    if print_progress:
                        print('Breaking after', seconds, 'seconds')
                    break

            if self.save_file:
                saver.save(sess, self.save_file)

            if self.tensorboard:
                writer.add_graph(sess.graph)

        if show_fig:
            plt.plot(costs)
            plt.title('Costs and Scores')
            plt.show()
class bern_emb_model():
    def __init__(self, d, K, sig, sess, logdir):
        self.K = K
        self.sig = sig
        self.sess = sess
        self.logdir = logdir

        with tf.name_scope('model'):
            # Data Placeholder
            with tf.name_scope('input'):
                self.placeholders = tf.placeholder(tf.int32)
                self.words = self.placeholders

            # Index Masks
            with tf.name_scope('context_mask'):
                self.p_mask = tf.cast(
                    tf.range(d.cs / 2, d.n_minibatch + d.cs / 2), tf.int32)
                rows = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, d.cs / 2), [0]),
                            [d.n_minibatch, 1]), tf.int32)
                columns = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, d.n_minibatch), [1]),
                            [1, d.cs / 2]), tf.int32)
                self.ctx_mask = tf.concat(
                    [rows + columns, rows + columns + d.cs / 2 + 1], 1)

            with tf.name_scope('embeddings'):
                # Embedding vectors
                self.rho = tf.Variable(tf.random_normal([d.L, self.K]) /
                                       self.K,
                                       name='rho')

                # Context vectors
                self.alpha = tf.Variable(tf.random_normal([d.L, self.K]) /
                                         self.K,
                                         name='alpha')

                with tf.name_scope('priors'):
                    prior = Normal(loc=0.0, scale=self.sig)
                    self.log_prior = tf.reduce_sum(
                        prior.log_prob(self.rho) + prior.log_prob(self.alpha))

            with tf.name_scope('natural_param'):
                # Taget and Context Indices
                with tf.name_scope('target_word'):
                    self.p_idx = tf.gather(self.words, self.p_mask)
                    self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx))

                # Negative samples
                with tf.name_scope('negative_samples'):
                    unigram_logits = tf.tile(
                        tf.expand_dims(tf.log(tf.constant(d.unigram)), [0]),
                        [d.n_minibatch, 1])
                    self.n_idx = tf.multinomial(unigram_logits, d.ns)
                    self.n_rho = tf.gather(self.rho, self.n_idx)

                with tf.name_scope('context'):
                    self.ctx_idx = tf.squeeze(
                        tf.gather(self.words, self.ctx_mask))
                    self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx)

                # Natural parameter
                ctx_sum = tf.reduce_sum(self.ctx_alphas, [1])
                self.p_eta = tf.expand_dims(
                    tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1)
                self.n_eta = tf.reduce_sum(
                    tf.multiply(
                        self.n_rho,
                        tf.tile(tf.expand_dims(ctx_sum, 1), [1, d.ns, 1])), -1)

            # Conditional likelihood
            self.y_pos = Bernoulli(logits=self.p_eta)
            self.y_neg = Bernoulli(logits=self.n_eta)

            self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0))
            self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0))

            self.log_likelihood = self.ll_pos + self.ll_neg

            scale = 1.0 * d.N / d.n_minibatch
            self.loss = -(scale * self.log_likelihood + self.log_prior)

            # Training
            optimizer = tf.train.AdamOptimizer()
            self.train = optimizer.minimize(self.loss)
            with self.sess.as_default():
                tf.global_variables_initializer().run()
            variable_summaries('rho', self.rho)
            variable_summaries('alpha', self.alpha)
            with tf.name_scope('objective'):
                tf.summary.scalar('loss', self.loss)
                tf.summary.scalar('priors', self.log_prior)
                tf.summary.scalar('ll_pos', self.ll_pos)
                tf.summary.scalar('ll_neg', self.ll_neg)
            self.summaries = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter(self.logdir,
                                                      self.sess.graph)
            self.saver = tf.train.Saver()
            config = projector.ProjectorConfig()

            alpha = config.embeddings.add()
            alpha.tensor_name = 'model/embeddings/alpha'
            alpha.metadata_path = '../vocab.tsv'
            rho = config.embeddings.add()
            rho.tensor_name = 'model/embeddings/rho'
            rho.metadata_path = '../vocab.tsv'
            projector.visualize_embeddings(self.train_writer, config)

    def dump(self, fname):
        with self.sess.as_default():
            dat = {'rho': self.rho.eval(), 'alpha': self.alpha.eval()}
        pickle.dump(dat, open(fname, "a+"))

    def plot_params(self, dir_name, labels):
        plot_only = len(labels)

        with self.sess.as_default():
            tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
            low_dim_embs_alpha2 = tsne.fit_transform(
                self.alpha.eval()[:plot_only])
            plot_with_labels(low_dim_embs_alpha2[:plot_only],
                             labels[:plot_only], dir_name + '/alpha.eps')

            tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
            low_dim_embs_rho2 = tsne.fit_transform(self.rho.eval()[:plot_only])
            plot_with_labels(low_dim_embs_rho2[:plot_only], labels[:plot_only],
                             dir_name + '/rho.eps')