コード例 #1
0
ファイル: base_model.py プロジェクト: Perf-Org-5KRepos/vi-hds
    def __init__(self,
                 nspecies,
                 n_hidden_precisions,
                 inputs=None,
                 hidden_activation=tf.nn.tanh):
        '''Initialize neural precisions layers'''
        self.nspecies = nspecies
        if inputs is None:
            inputs = self.nspecies + 1
        inp = Dense(n_hidden_precisions,
                    activation=hidden_activation,
                    use_bias=True,
                    name="prec_hidden",
                    input_shape=(inputs, ))
        act_layer = Dense(4,
                          activation=tf.nn.sigmoid,
                          name="prec_act",
                          bias_constraint=NonNeg())
        deg_layer = Dense(4,
                          activation=tf.nn.sigmoid,
                          name="prec_deg",
                          bias_constraint=NonNeg())
        self.act = Sequential([inp, act_layer])
        self.deg = Sequential([inp, deg_layer])

        for layer in [inp, act_layer, deg_layer]:
            weights, bias = layer.weights
            variable_summaries(weights, layer.name + "_kernel", False)
            variable_summaries(bias, layer.name + "_bias", False)
コード例 #2
0
    def _linear_layer(self, input, dim_0, dim_1, name='', out_layer=False, lap=0):
        '''
        Builds a linear layer
        :param input: (tensor)
        :param dim_0: (int)
        :param dim_1: (int)
        :param name: (str) name of the layer
        :param out_layer: (Boolean) if True, no activation
        :return: (tensor)
        '''

        with tf.name_scope(name):
            with tf.name_scope('Weights'):
                W =  tf.Variable(tf.truncated_normal([dim_0, dim_1], stddev=0.1), name='W')
                variable_summaries(W, ['train'], family='Lap_{}'.format(lap))
                b = tf.Variable(tf.zeros([1, dim_1]), name='Bias_hidden')
                variable_summaries(b, ['train'], family='Lap_{}'.format(lap))

            out_matmul = tf.matmul(input, W, name='Matmul')
            out = tf.add(out_matmul, b, name='Add')
            tf.summary.histogram('pre_activations', out, collections=['train'], family='Lap_{}'.format(lap))
            if out_layer == False:
                out = tf.nn.relu(out, name='Relu')
                tf.summary.histogram('post_activations', out, collections=['train'], family='Lap_{}'.format(lap))

        return out
コード例 #3
0
 def build_train_step(self, dreg, encoder, objective, opt_func):
     '''Returns a computation that is run in the tensorflow session.'''
     # This path is for b_use_correct_iwae_gradients = True. For False, we would just
     # want to return opt_func.minimize(objective.vae_cost)
     trainable_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
     if dreg:
         grads = self.create_dreg_gradients(encoder, objective,
                                            trainable_params)
         print("Set up Doubly Reparameterized Gradient (dreg)")
     else:
         # ... so, get list of params we will change with grad and ...
         # ... compute the VAE elbo gradient, using special stop grad function to prevent propagating gradients
         # through ws (ie just a copy)
         grads = tf.gradients(objective.vae_cost, trainable_params)
         print("Set up non-dreg gradient")
         # grads = [tf.clip_by_value(g, -0.1, 0.1) for g in iwae_grads]
     if self.tb_gradients:
         with tf.name_scope('Gradients'):
             for p, g in zip(trainable_params, grads):
                 variable_summaries(g,
                                    p.name.split(':')[0],
                                    self.plot_histograms)
     # TODO(dacart): check if this should go above "optimizer =" or be deleted.
     #clipped_grads = [tf.clip_by_norm(g, 1.0) for g in grads]
     # This depends on update rule implemented in AdamOptimizer:
     optimizer = opt_func.apply_gradients(zip(grads, trainable_params))
     return optimizer
コード例 #4
0
ファイル: metrics.py プロジェクト: zwytop/GraphEmbedding
def masked_softmax_cross_entropy(preds, labels, mask):
    """Softmax cross-entropy loss with masking."""
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    fin_loss=tf.reduce_mean(loss)
    variable_summaries(fin_loss)
    return fin_loss
コード例 #5
0
ファイル: metrics.py プロジェクト: zwytop/GraphEmbedding
def masked_accuracy(preds, labels, mask):
    """Accuracy with masking."""
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    accuracy=tf.reduce_mean(accuracy_all)
    variable_summaries(accuracy)
    return accuracy
コード例 #6
0
ファイル: one2one.py プロジェクト: lavanova/deep_weather
 def one2oneLayer(self, input, namespace):
     with tf.name_scope(namespace):
         shape = X_SHAPE
         weights = tf.Variable(tf.ones(shape), name='weights')
         bias = tf.Variable(tf.zeros(shape), name='bias')
         out = tf.multiply(input, weights) + bias
         # out = tf.identity(out, name="out")
         with tf.name_scope('summary'):
             variable_summaries(weights, name='weights')
             variable_summaries(bias, name='bias')
     return out
コード例 #7
0
 def build_ff_neural_net(nn_input,
                         n_inputs,
                         hidden_layers,
                         nonlinearity,
                         scope_name,
                         variable_name,
                         collect_summary,
                         logit_weights=None,
                         initializer=layers.xavier_initializer(),
                         dropout=False):
     assert len(hidden_layers) == len(nonlinearity)
     name_scope = '%s/%s' % (scope_name, variable_name)
     h = nn_input
     n_hiddens = n_inputs
     n_hiddens_next = hidden_layers[0]
     for i in range(len(hidden_layers)):
         w = get_scope_variable(scope_name,
                                "%s/layer%d/weights" %
                                (variable_name, i),
                                shape=(n_hiddens, n_hiddens_next),
                                initializer=initializer)
         b = get_scope_variable(scope_name,
                                "%s/layer%d/biases" %
                                (variable_name, i),
                                shape=(n_hiddens_next),
                                initializer=initializer)
         if collect_summary:
             with tf.name_scope(name_scope + '/layer%d' % i):
                 with tf.name_scope('weights'):
                     variable_summaries(w)
                 with tf.name_scope('biases'):
                     variable_summaries(b)
                 with tf.name_scope('Wx_plus_b'):
                     pre_h = tf.matmul(h, w) + b
                     # Yunfei: dropout option is useless now
                     if dropout:
                         # if i == 0:
                         #     pre_h = tf.nn.dropout(tf.matmul(h,w), keep_prob=0.8) + b
                         # else:
                         pre_h = tf.nn.dropout(tf.matmul(h, w),
                                               keep_prob=dropout) + b
                     tf.summary.histogram('pre_activations', pre_h)
                 h = nonlinearity[i](pre_h, name='activation')
                 tf.summary.histogram('activations', h)
         else:
             pre_h = tf.matmul(h, w) + b
             h = nonlinearity[i](pre_h, name='activation')
         n_hiddens = hidden_layers[i]
         if i + 1 < len(hidden_layers):
             n_hiddens_next = hidden_layers[i + 1]
         if logit_weights is not None and i == len(hidden_layers) - 2:
             h *= logit_weights
     return h
コード例 #8
0
    def initialize_training(self):
        #optimizer = tf.train.GradientDescentOptimizer(self.eta)
        optimizer = tf.train.AdamOptimizer()
        self.train = optimizer.minimize(self.loss)
        self.sess = tf.Session()
        with self.sess.as_default():
            tf.global_variables_initializer().run()

        #if self.hierarchical:
        #    for t in range(4):
        #        variable_summaries(self.states[t]+'_diff', tf.abs(self.rho
        #                  -  self.geo_rho[self.states[t]]))
        if self.amortized:
            variable_summaries('phi', self.phi)
            variable_summaries('rho', self.rho)
            variable_summaries('alpha', self.alpha)
            for t in range(self.n_states):
                variable_summaries(
                    self.states[t] + '_rho',
                    neural_network(self.rho, self.phi, self.K, t, self.H0,
                                   self.resnet))
                variable_summaries(
                    self.states[t] + '_diff',
                    tf.abs(self.rho - neural_network(
                        self.rho, self.phi, self.K, t, self.H0, self.resnet)))
        with tf.name_scope('objective'):
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('priors', self.log_prior)
            tf.summary.scalar('ll_pos', self.ll_pos)
            tf.summary.scalar('ll_neg', self.ll_neg)
        self.summaries = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(self.logdir, self.sess.graph)
        self.saver = tf.train.Saver()
        config = projector.ProjectorConfig()
        alpha = config.embeddings.add()
        alpha.tensor_name = 'model/embeddings/alpha'
        alpha.metadata_path = '../vocab.tsv'
        if self.amortized:
            phi = config.embeddings.add()
            phi.tensor_name = 'model/embeddings/phi'
            phi.metadata_path = '../states.tsv'
        rho = config.embeddings.add()
        rho.tensor_name = 'model/embeddings/rho'
        rho.metadata_path = '../vocab.tsv'
        for state in self.states:
            rho = config.embeddings.add()
            rho.tensor_name = 'model/embeddings/' + state + '_rho'
            rho.metadata_path = '../vocab.tsv'
        projector.visualize_embeddings(self.train_writer, config)
コード例 #9
0
    def model(self, X, Y):
        feature = int(np.prod(X.get_shape()[1:]))
        classes = int(np.prod(Y.get_shape()[1:]))
        keep_prob = tf.placeholder(tf.float32)

        # cnn layer
        W = weight_variable([feature, classes])
        b = bias_variable([classes])

        # loss
        logits = tf.matmul(X, W) + b
        entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                          Y,
                                                          name='loss')
        loss = tf.reduce_mean(entropy)
        variable_summaries(loss, 'loss')

        return logits, loss, keep_prob, "linear"
コード例 #10
0
ファイル: models.py プロジェクト: fhahaha/fgsm-attack
def ConvNet(x,
            input_shape,
            filters_out=64,
            n_classes=10,
            non_linearity='relu'):
    # Basic CNN from Cleverhans MNIST tutorial:
    # https://github.com/mmarius/cleverhans/blob/master/cleverhans_tutorials/tutorial_models.py#L155
    h = x
    input_shape = list(input_shape)
    h, output_shape = l.conv2d(h,
                               kernel_size=8,
                               stride=2,
                               filters_in=input_shape[-1],
                               filters_out=filters_out,
                               padding='SAME',
                               name='conv1')
    h = l.non_linearity(h, name=non_linearity)
    h, output_shape = l.conv2d(h,
                               kernel_size=6,
                               stride=2,
                               filters_in=output_shape[-1],
                               filters_out=filters_out * 2,
                               padding='VALID',
                               name='conv2')
    h = l.non_linearity(h, name=non_linearity)
    h, output_shape = l.conv2d(h,
                               kernel_size=5,
                               stride=1,
                               filters_in=output_shape[-1],
                               filters_out=filters_out * 2,
                               padding='VALID',
                               name='conv3')
    h = l.non_linearity(h, name=non_linearity)

    h, output_shape = l.flatten(input_shape=output_shape, x=h)
    logits, output_shape = l.linear(input_shape=output_shape,
                                    n_hidden=n_classes,
                                    x=h,
                                    name='output-layer')
    utils.variable_summaries(logits, name='unscaled-logits-output-layer')

    return logits
コード例 #11
0
    def model(self, X, Y):
        feature = int(np.prod(X.get_shape()[1:]))
        classes = int(np.prod(Y.get_shape()[1:]))
        x_image = tf.reshape(X, [-1, feature, 1, 1])

        # 1st conv layer
        with tf.name_scope('conv1'):
            W = weight_variable([5, 1, 1, 32])
            b = bias_variable([32])
            h = tf.nn.relu(conv2d(x_image, W) + b)
            conv1 = max_pool_2x2(h)

        # 2nd conv layer
        with tf.name_scope('conv2'):
            W = weight_variable([5, 1, 32, 64])
            b = bias_variable([64])
            conv2 = tf.nn.relu(conv2d(conv1, W) + b)

        keep_prob = tf.placeholder(tf.float32)

        # 1st fc layer
        with tf.name_scope('fc1'):
            shape = int(np.prod(conv2.get_shape()[1:]))
            W = weight_variable([shape, 1024])
            b = bias_variable([1024])
            conv2_flat = tf.reshape(conv2, [-1, shape])
            h = tf.nn.relu(tf.matmul(conv2_flat, W) + b)
            fc1 = tf.nn.dropout(h, keep_prob)

        # 2nd fc layer
        with tf.name_scope('fc2'):
            W = weight_variable([1024, classes])
            b = bias_variable([classes])
            logits = tf.matmul(fc1, W) + b
            entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                              Y,
                                                              name='loss')
            loss = tf.reduce_mean(entropy)
            variable_summaries(loss, 'loss')

        return logits, loss, keep_prob, 'cnn'
コード例 #12
0
    def add_linear_output_layer(self, last_hidden_layer, ground_truth, corpus_tag, task_tag, loss_weight=1):
        # returns loss op
        with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope:
            last_out = fully_connected(last_hidden_layer, 1, activation_fn=tf.identity,
                                       weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg),
                                       scope=layer_scope)
            self.predictions = last_out

        with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)):
            loss = loss_weight * tf.reduce_mean(tf.squared_difference(last_out, ground_truth))
            utils.variable_summaries(loss, "loss", corpus_tag)
            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

        with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)):
            accuracy, _ = streaming_mean_relative_error(last_out, ground_truth, ground_truth,
                                                        name="acc_%s" % corpus_tag,
                                                        updates_collections=tf.GraphKeys.UPDATE_OPS)
            accuracy = 1 - accuracy
            utils.variable_summaries(accuracy, "accuracy", corpus_tag)

            updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
コード例 #13
0
    def encode(self):

        encoded_output, encoded_state = utils.encode_seq(
            input_seq=self.q1,
            seq_len=self.len1,
            word_embeddings=self.word_embeddings,
            num_neurons=self.num_neurons)  # [batch_size, 2*num_neurons]

        with tf.variable_scope(
                "variational_inference"):  # Variational inference
            mean = utils.linear(encoded_state, self.hidden_size,
                                scope='mean')  # [batch_size, n_hidden]
            logsigm = utils.linear(encoded_state,
                                   self.hidden_size,
                                   scope='logsigm')  # [batch_size, n_hidden]
            self.mean, self.logsigm = mean, logsigm

            # Gaussian Multivariate kld(z,N(0,1)) = -0.5 * [ sum_d(logsigma) + d - sum_d(sigma) - mu_T*mu]
            klds = -0.5 * (tf.reduce_sum(logsigm, 1) +
                           tf.cast(tf.shape(mean)[1], tf.float32) -
                           tf.reduce_sum(tf.exp(logsigm), 1) -
                           tf.reduce_sum(tf.square(mean), 1)
                           )  # KLD(q(z|x), N(0,1))     tensor [batch_size]
            utils.variable_summaries(
                'klds', klds)  # posterior distribution close to prior N(0,1)
            self.kld = tf.reduce_mean(klds, 0)  # mean over batches: scalar

            h_ = tf.get_variable("GO", [1, self.hidden_size],
                                 initializer=self.initializer)
            h_ = tf.tile(h_, [self.batch_size, 1
                              ])  # trainable tensor: decoder init_state[1]

            eps = tf.random_normal((self.batch_size, self.hidden_size), 0, 1)
            self.doc_vec = tf.multiply(
                tf.exp(logsigm), eps
            ) + mean  # sample from latent intent space: decoder init_state[0]
            self.doc_vec = self.doc_vec, h_  # tuple state Z, h
コード例 #14
0
    def add_classification_output_layer(self, last_hidden_layer, gt_labels, num_classes, corpus_tag, task_tag,
                                        loss_weight=1):
        # returns loss op
        with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope:
            last_out = fully_connected(last_hidden_layer, num_classes, activation_fn=tf.identity,
                                       weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg),
                                       scope=layer_scope)
            self.predictions = tf.nn.softmax(last_out)

        with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)):
            loss = loss_weight * tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(last_out, gt_labels))
            utils.variable_summaries(loss, "loss", corpus_tag)
            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

        with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)):
            # correct_prediction = tf.equal(tf.argmax(last_out, 1), gt_labels)
            # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100
            accuracy, _ = streaming_accuracy(tf.argmax(last_out, 1), gt_labels, name="acc_%s" % corpus_tag,
                                             updates_collections=tf.GraphKeys.UPDATE_OPS)

            utils.variable_summaries(accuracy, "accuracy", corpus_tag)

            updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
コード例 #15
0
ファイル: training.py プロジェクト: wwxFromTju/me-trpo
 def policy_model(x, stochastic=0.0, collect_summary=False):
     assert (training_layers[0].shape[1] == x.shape[1])
     h = x
     for i, layer in enumerate(training_layers[1:]):
         w = layer.W
         b = layer.b
         pre_h = tf.matmul(h, w) + b
         h = layer.nonlinearity(pre_h, name='policy_out')
         if collect_summary:
             with tf.name_scope(scope_name + '/observation'):
                 variable_summaries(x)
             with tf.name_scope(scope_name + '/layer%d' % i):
                 with tf.name_scope('weights'):
                     variable_summaries(w)
                 with tf.name_scope('biases'):
                     variable_summaries(b)
                 with tf.name_scope('Wx_plus_b'):
                     tf.summary.histogram('pre_activations', pre_h)
                 tf.summary.histogram('activations', h)
     std = training_policy._l_std_param.param
     h += stochastic * tf.random_normal(
         shape=(tf.shape(x)[0], n_actions)) * tf.exp(std)
     return h
コード例 #16
0
ファイル: models.py プロジェクト: fhahaha/fgsm-attack
def SimpleNet1(x,
               input_shape,
               neurons=1024,
               n_classes=10,
               non_linearity='relu',
               create_summaries=True):
    h = x
    h, output_shape = l.flatten(input_shape, h)
    h, output_shape = l.linear(output_shape, neurons, h, name='linear1')
    if create_summaries:
        utils.variable_summaries(h, name='linear-comb-hidden-layer')

    h = l.non_linearity(h, name=non_linearity)
    if create_summaries:
        utils.variable_summaries(h, name='activation-hidden-layer')
        sparsity = tf.nn.zero_fraction(h,
                                       name='activation-hidden-layer-sparsity')
        tf.summary.scalar(sparsity.op.name, sparsity)

    logits, output_shape = l.linear(output_shape, n_classes, h, name='output')
    if create_summaries:
        utils.variable_summaries(logits, name='unscaled-logits-output-layer')

    return logits
コード例 #17
0
    def build(self):
        self.init_variables()

        ## batchsize x 5 x labelemb
        self.yemb = tf.nn.embedding_lookup(self.labelemb,
                                           self.ys_,
                                           name='yemb')

        ## batchsize x 10 x labelemb
        self.negemb = tf.nn.embedding_lookup(self.labelemb,
                                             self.negsamples,
                                             name='negemb')
        # rnnin = [tf.zeros(shape=(tf.shape(yemb)[0], 1)) for i in range(5)]
        log.info('input label embedding-{}'.format(self.yemb.get_shape()))
        log.info('negative sample embedding-{}'.format(
            self.negemb.get_shape()))

        rnnin = [self.inputs for i in range(self.numfuncs)]
        rnnout, rnn_final_states = tf.nn.static_rnn(self.lstmcell,
                                                    rnnin,
                                                    dtype=tf.float32)
        #initial_state=self.inputs
        #)
        # log.info('rnnout shape {}'.format(rnnout.get_shape()))
        rflat = tf.reshape(rnnout, shape=[-1, self.lstm_statesize])

        # batchsize*5 x labeldim
        self.output = tf.nn.l2_normalize(tf.nn.softplus(
            tf.matmul(rflat, self.output_weights) + self.output_bias,
            name='yhat'),
                                         axis=1)

        log.info('final decoder out shape {}'.format(self.output.get_shape()))
        # ipdb.set_trace()
        self.transformed_y = tf.nn.l2_normalize(tf.matmul(
            tf.reshape(self.yemb, shape=[-1, self.label_dimensions]),
            self.ytransform),
                                                axis=1)

        variable_summaries(self.transformed_y)
        # batch size*10 x labeldim
        self.transformed_negsamples = tf.nn.l2_normalize(tf.matmul(
            tf.reshape(self.negemb, shape=[-1, self.label_dimensions]),
            self.ytransform),
                                                         axis=1)

        variable_summaries(self.ytransform)
        # batchsize *5 x 1
        self.cosinesim_pos = tf.reduce_sum(tf.multiply(self.output,
                                                       self.transformed_y),
                                           axis=1)

        # batchsize *5 x batchsize*10
        self.cosinesim_neg = tf.matmul(
            self.output, tf.transpose(self.transformed_negsamples))

        # batchsize *5 x 1
        self.min_neg_dist = tf.reduce_min(self.cosinesim_neg, axis=1)

        self.loss = tf.reduce_mean(
            tf.exp(self.cosinesim_pos, name='posdist') /
            (tf.exp(self.min_neg_dist, name='negdist') + tf.constant(1e-3)),
            name='loss')

        tf.summary.scalar('loss', self.loss)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate)
        self.train = self.optimizer.minimize(self.loss)

        self.summary = tf.summary.merge_all()
        # self.predictions, self.precision, self.recall, self.f1 = self.make_prediction()
        self.predictions = self.make_prediction()
        return self
コード例 #18
0
    def negQ(self, x, y, reuse=False):
        """Architecture of the neural network"""
        print('x shape', x.get_shape())
        print('y shape', y.get_shape())
        szs = self.layers_dim
        assert (len(szs) >= 1)
        fc = tflearn.fully_connected
        bn = tflearn.batch_normalization
        lrelu = tflearn.activations.leaky_relu

        if reuse:
            tf.get_variable_scope().reuse_variables()

        nLayers = len(szs)
        us = []
        zs = []
        z_zs = []
        z_ys = []
        z_us = []

        reg = 'L2'

        prevU = x
        for i in range(nLayers):
            with tf.variable_scope('u' + str(i), reuse=reuse) as s:
                u = fc(prevU, szs[i], reuse=reuse, scope=s, regularizer=reg)
                if i < nLayers - 1:
                    u = tf.nn.relu(u)
                    if FLAGS.icnn_bn:
                        u = bn(u, reuse=reuse, scope=s, name='bn')
            variable_summaries(u, suffix='u{}'.format(i))
            us.append(u)
            prevU = u

        prevU, prevZ = x, y
        for i in range(nLayers + 1):
            sz = szs[i] if i < nLayers else 1
            z_add = []
            if i > 0:
                with tf.variable_scope('z{}_zu_u'.format(i), reuse=reuse) as s:
                    zu_u = fc(prevU, szs[i - 1], reuse=reuse, scope=s,
                              activation='relu', bias=True,
                              regularizer=reg, bias_init=tf.constant_initializer(1.))
                    variable_summaries(zu_u, suffix='zu_u{}'.format(i))
                with tf.variable_scope('z{}_zu_proj'.format(i), reuse=reuse) as s:
                    z_zu = fc(tf.multiply(prevZ, zu_u), sz, reuse=reuse, scope=s,
                              bias=False, regularizer=reg)
                    variable_summaries(z_zu, suffix='z_zu{}'.format(i))
                z_zs.append(z_zu)
                z_add.append(z_zu)

            with tf.variable_scope('z{}_yu_u'.format(i), reuse=reuse) as s:
                yu_u = fc(prevU, self.dimA, reuse=reuse, scope=s, bias=True,
                          regularizer=reg, bias_init=tf.constant_initializer(1.))
                variable_summaries(yu_u, suffix='yu_u{}'.format(i))
            with tf.variable_scope('z{}_yu'.format(i), reuse=reuse) as s:
                z_yu = fc(tf.multiply(y, yu_u), sz, reuse=reuse, scope=s, bias=False,
                          regularizer=reg)
                z_ys.append(z_yu)
                variable_summaries(z_yu, suffix='z_yu{}'.format(i))
            z_add.append(z_yu)

            with tf.variable_scope('z{}_u'.format(i), reuse=reuse) as s:
                z_u = fc(prevU, sz, reuse=reuse, scope=s,
                         bias=True, regularizer=reg,
                         bias_init=tf.constant_initializer(0.))
                variable_summaries(z_u, suffix='z_u{}'.format(i))
            z_us.append(z_u)
            z_add.append(z_u)

            z = tf.add_n(z_add)
            variable_summaries(z, suffix='z{}_preact'.format(i))
            if i < nLayers:
                # z = tf.nn.relu(z)
                z = lrelu(z, alpha=FLAGS.lrelu)
                variable_summaries(z, suffix='z{}_act'.format(i))

            zs.append(z)
            prevU = us[i] if i < nLayers else None
            prevZ = z

        print('z shape', z.get_shape())
        z = tf.reshape(z, [-1], name='energies')
        return z
コード例 #19
0
def train(ARGS):
    # Define helper function for evaluating on test data during training
    def eval(epoch):
        from train_utils import clean_eval
        test_accuracy, test_loss, _ = clean_eval(sess, x, y, is_training,
                                                 testloader, n_classes, logits,
                                                 preds)
        # Write tensorboard summary
        acc_summary = tf.Summary()
        acc_summary.value.add(tag='Evaluation/accuracy/test',
                              simple_value=test_accuracy)
        writer_test.add_summary(acc_summary, epoch)

        # Write tensorboard summary
        err_summary = tf.Summary()
        err_summary.value.add(tag='Evaluation/error/test',
                              simple_value=1.0 - test_accuracy)
        writer_test.add_summary(err_summary, epoch)

        # Write tensorboard summary
        loss_summary = tf.Summary()
        loss_summary.value.add(tag='Evaluation/loss/test',
                               simple_value=test_loss)
        writer_test.add_summary(loss_summary, epoch)

    # Define helper function for evaluating on adversarial test data during training
    def adv_eval(epoch):
        from train_utils import adversarial_eval
        adv_accuracy, adv_loss = adversarial_eval(sess,
                                                  x,
                                                  y,
                                                  is_training,
                                                  adv_testloader,
                                                  n_classes,
                                                  preds,
                                                  adv_preds,
                                                  eval_all=True)

        # Write tensorboard summary
        acc_summary = tf.Summary()
        acc_summary.value.add(tag='Evaluation/adversarial-accuracy/test',
                              simple_value=adv_accuracy)
        writer_test.add_summary(acc_summary, epoch)

        # Write tensorboard summary
        err_summary = tf.Summary()
        err_summary.value.add(tag='Evaluation/adversarial-error/test',
                              simple_value=1.0 - adv_accuracy)
        writer_test.add_summary(err_summary, epoch)

        # Write tensorboard summary
        loss_summary = tf.Summary()
        loss_summary.value.add(tag='Evaluation/adversarial-loss/test',
                               simple_value=adv_loss)
        writer_test.add_summary(loss_summary, epoch)

    # Define computational graph
    with tf.Graph().as_default() as g:
        # Define placeholders
        with tf.device('/gpu:0'):
            with tf.name_scope('Placeholders'):
                x = tf.placeholder(dtype=tf.float32,
                                   shape=input_shape,
                                   name='inputs')
                x_pair1 = tf.placeholder(dtype=tf.float32,
                                         shape=input_shape,
                                         name='x-pair1')
                x_pair2 = tf.placeholder(dtype=tf.float32,
                                         shape=input_shape,
                                         name='x-pair2')
                y = tf.placeholder(dtype=tf.float32,
                                   shape=(None, n_classes),
                                   name='labels')
                is_training = tf.placeholder_with_default(True,
                                                          shape=(),
                                                          name='is-training')

        # Define TF session
        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        sess = tf.Session(graph=g, config=config)

        # Define model
        with tf.name_scope('Model'):
            with tf.device('/gpu:0'):
                model = Model(nb_classes=n_classes,
                              input_shape=input_shape,
                              is_training=is_training)

                # Define forward-pass
                with tf.name_scope('Logits'):
                    logits = model.get_logits(x)
                with tf.name_scope('Probs'):
                    preds = tf.nn.softmax(logits)

                with tf.name_scope('Accuracy'):
                    ground_truth = tf.argmax(y, axis=1)
                    predicted_label = tf.argmax(preds, axis=1)
                    correct_prediction = tf.equal(predicted_label,
                                                  ground_truth)
                    acc = tf.reduce_mean(tf.to_float(correct_prediction),
                                         name='accuracy')
                    tf.add_to_collection('accuracies', acc)

                    err = tf.identity(1.0 - acc, name='error')
                    tf.add_to_collection('accuracies', err)

                # Define losses
                with tf.name_scope('Losses'):
                    ce_loss, wd_loss, clp_loss, lsq_loss, at_loss, alp_loss = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                    adv_logits = None

                    if ARGS.ct:
                        with tf.name_scope('Cross-Entropy-Loss'):
                            ce_loss = tf.reduce_mean(
                                tf.nn.softmax_cross_entropy_with_logits(
                                    logits=logits, labels=y),
                                name='cross-entropy-loss')

                            tf.add_to_collection('losses', ce_loss)

                    if ARGS.at:
                        with tf.name_scope('Adversarial-Cross-Entropy-Loss'):
                            at_loss, adv_logits = get_at_loss(
                                sess, x, y, model, ARGS.eps, ARGS.eps_iter,
                                ARGS.nb_iter)
                            at_loss = tf.identity(at_loss, name='at-loss')
                            tf.add_to_collection('losses', at_loss)

                    with tf.name_scope('Regularizers'):
                        if ARGS.wd:
                            with tf.name_scope('Weight-Decay'):
                                for var in tf.trainable_variables():
                                    if 'beta' in var.op.name:
                                        # Do not regularize bias of batch normalization
                                        continue
                                    # print('regularizing: ', var.op.name)
                                    wd_loss += tf.nn.l2_loss(var)

                                reg_loss = tf.identity(wd_loss, name='wd-loss')
                                tf.add_to_collection('losses', reg_loss)

                        if ARGS.alp:
                            with tf.name_scope('Adversarial-Logit-Pairing'):
                                alp_loss = get_alp_loss(
                                    sess, x, y, logits, adv_logits, model,
                                    ARGS.eps, ARGS.eps_iter, ARGS.nb_iter)

                                alp_loss = tf.identity(alp_loss,
                                                       name='alp-loss')
                                tf.add_to_collection('losses', alp_loss)

                        if ARGS.clp:
                            with tf.name_scope('Clean-Logit-Pairing'):
                                clp_loss = get_clp_loss(
                                    x_pair1, x_pair2, model)
                                clp_loss = tf.identity(clp_loss,
                                                       name='clp-loss')
                                tf.add_to_collection('losses', clp_loss)

                        if ARGS.lsq:
                            with tf.name_scope('Logit-Squeezing'):
                                lsq_loss = get_lsq_loss(x, model)
                                lsq_loss = tf.identity(lsq_loss,
                                                       name='lsq-loss')
                                tf.add_to_collection('losses', lsq_loss)

                    with tf.name_scope('Total-Loss'):
                        # Define objective function
                        total_loss = (ARGS.ct_lambda * ce_loss) + (
                            ARGS.at_lambda *
                            at_loss) + (ARGS.wd_lambda * wd_loss) + (
                                ARGS.clp_lambda *
                                clp_loss) + (ARGS.lsq_lambda * lsq_loss) + (
                                    ARGS.alp_lambda * alp_loss)

                        total_loss = tf.identity(total_loss, name='total-loss')
                        tf.add_to_collection('losses', total_loss)

                # Define PGD adversary
                with tf.name_scope('PGD-Attacker'):
                    pgd_params = {
                        'ord': np.inf,
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'eps_iter': ARGS.eps_iter / 255,
                        'nb_iter': ARGS.nb_iter,
                        'rand_init': True,
                        'rand_minmax': ARGS.eps / 255,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'sanity_checks': True
                    }

                    pgd = ProjectedGradientDescent(model, sess=sess)
                    adv_x = pgd.generate(x, **pgd_params)

                    with tf.name_scope('Logits'):
                        adv_logits = model.get_logits(adv_x)
                    with tf.name_scope('Probs'):
                        adv_preds = tf.nn.softmax(adv_logits)

        # Define optimizer
        with tf.device('/gpu:0'):
            with tf.name_scope('Optimizer'):
                # Define global step variable
                global_step = tf.get_variable(
                    name='global_step',
                    shape=[],  # scalar
                    dtype=tf.float32,
                    initializer=tf.zeros_initializer(),
                    trainable=False)

                optimizer = tf.train.AdamOptimizer(learning_rate=ARGS.lr,
                                                   beta1=0.9,
                                                   beta2=0.999,
                                                   epsilon=1e-6,
                                                   use_locking=False,
                                                   name='Adam')
                trainable_vars = tf.trainable_variables()

                update_bn_ops = tf.get_collection(
                    tf.GraphKeys.UPDATE_OPS
                )  # this collection stores the moving_mean and moving_variance ops
                #  for batch normalization
                with tf.control_dependencies(update_bn_ops):
                    grads_and_vars = optimizer.compute_gradients(
                        total_loss, trainable_vars)
                    train_step = optimizer.apply_gradients(
                        grads_and_vars, global_step=global_step)

        # Add Tensorboard summaries
        with tf.device('/gpu:0'):
            # Create file writers
            writer_train = tf.summary.FileWriter(ARGS.log_dir + '/train',
                                                 graph=g)
            writer_test = tf.summary.FileWriter(ARGS.log_dir + '/test')

            # Add summary for input images
            with tf.name_scope('Image-Summaries'):
                # Create image summary ops
                tf.summary.image('input',
                                 x,
                                 max_outputs=2,
                                 collections=['training'])

            # Add summaries for the training losses
            losses = tf.get_collection('losses')
            for entry in losses:
                tf.summary.scalar(entry.name, entry, collections=['training'])

            # Add summaries for the training accuracies
            accs = tf.get_collection('accuracies')
            for entry in accs:
                tf.summary.scalar(entry.name, entry, collections=['training'])

            # Add summaries for all trainable vars
            for var in trainable_vars:
                tf.summary.histogram(var.op.name,
                                     var,
                                     collections=['training'])
                var_norm = tf.norm(var, ord='euclidean')
                tf.summary.scalar(var.op.name + '/l2norm',
                                  var_norm,
                                  collections=['training'])

            # Add summaries for variable gradients
            for grad, var in grads_and_vars:
                if grad is not None:
                    tf.summary.histogram(var.op.name + '/gradients',
                                         grad,
                                         collections=['training'])
                    grad_norm = tf.norm(grad, ord='euclidean')
                    tf.summary.scalar(var.op.name + '/gradients/l2norm',
                                      grad_norm,
                                      collections=['training'])

            # Add summaries for the logits and model predictions
            with tf.name_scope('Logits-Summaries'):
                variable_summaries(tf.identity(logits, name='logits'),
                                   name='logits',
                                   collections=['training', 'test'],
                                   histo=True)
            with tf.name_scope('Predictions-Summaries'):
                variable_summaries(tf.identity(preds, name='predictions'),
                                   name='predictions',
                                   collections=['training', 'test'],
                                   histo=True)

        # Initialize all variables
        with sess.as_default():
            tf.global_variables_initializer().run()

        # Collect training params
        train_params = {
            'epochs': ARGS.epochs,
            'eval_step': ARGS.eval_step,
            'adv_eval_step': ARGS.adv_eval_step,
            'n_classes': n_classes,
            'clp': ARGS.clp
        }

        # Start training loop
        model_train(sess,
                    x,
                    y,
                    x_pair1,
                    x_pair2,
                    is_training,
                    trainloader,
                    train_step,
                    args=train_params,
                    evaluate=eval,
                    adv_evaluate=adv_eval,
                    writer_train=writer_train)

        # Save the trained model
        if ARGS.save:
            save_path = os.path.join(ARGS.save_dir, ARGS.filename)
            saver = tf.train.Saver(var_list=tf.global_variables())
            saver.save(sess, save_path)
            print("Saved model at {:s}".format(str(ARGS.save_dir)))
コード例 #20
0
ファイル: main.py プロジェクト: fhahaha/fgsm-attack
def main(_):
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    np.random.seed(1234)

    with tf.device('/cpu:0'):
        # Get time stamp
        experiment_ts = strftime("%H-%M-%S", localtime())

        # Create log and checkpoint dir for the current experiment
        FLAGS.log_dir += '/{:s}/{:s}'.format(FLAGS.optimizer, experiment_ts)
        utils.create_dir(FLAGS.log_dir)

        # Get training data
        X_train, Y_train, X_test, Y_test, X_val, Y_val = load_mnist(FLAGS.data_dir)

        # Adding validation data to training data (60.000 training data)
        X_train = np.append(X_train, X_val, axis=0)
        Y_train = np.append(Y_train, Y_val, axis=0)

        print('X_train shape: ', X_train.shape)
        print('X_test shape: ', X_test.shape)

        # Repeat training for all specified models
        for method in FLAGS.methods:

            # Create log dir
            log_dir = FLAGS.log_dir + '/{:s}'.format(method)
            utils.create_dir(log_dir)

            with tf.Graph().as_default() as g:

                with tf.device(FLAGS.device):
                    # Define placeholders for inputs
                    with tf.name_scope('Inputs'):
                        # Inputs
                        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X')
                        y = tf.placeholder(tf.float32, shape=(None, 10), name='y')

                    with tf.variable_scope('model') as scope:
                        with tf.name_scope('Training-Graph'):
                            # Define boundaries for learning rate decay
                            boundaries = None
                            # boundaries = [500.0, 800.0]
                            # Build the model
                            loss, train_op, global_step, grads_and_vars, optimizer, perturbation, x_adv = build_training_graph(
                                x,
                                y,
                                FLAGS.learning_rate,
                                method,
                                FLAGS.optimizer,
                                boundaries)

                            with tf.device('/cpu:0'):
                                # Add summaries for the training losses
                                with tf.name_scope('Loss-Summaries'):
                                    losses = tf.get_collection('losses')
                                    for entry in losses:
                                        tf.summary.scalar(entry.op.name, entry)

                                # Add histograms for all trainable variables and gradients
                                with tf.name_scope('Trainable-Variable-Summaries'):
                                    for var in tf.trainable_variables():
                                        utils.variable_summaries(var)
                                    for grad, var in grads_and_vars:
                                        if grad is not None:
                                            tf.summary.histogram(var.op.name + '/gradients', grad)
                                            grad_norm = tf.norm(grad, ord='euclidean')
                                            tf.summary.scalar(var.op.name + '/gradients/l2norm', grad_norm)

                        with tf.name_scope('Eval-Graph'):
                            # Create ops used for evaluating the model on test data
                            losses_eval, fgsm_perturbation, fgsm_x_adv = build_eval_graph(x, y, scope)

                        with tf.name_scope('Image-Summary'):
                            # Create image summary op for clean images
                            tf.summary.image('training-images', x, max_outputs=2)
                            if perturbation is not None and x_adv is not None:
                                # Create image summary op for FGSM adversarial images
                                tf.summary.image('fgsm-adversarial-training-perturbations', perturbation, max_outputs=2)
                                # Create image summary op for FGSM adversarial images
                                tf.summary.image('fgsm-adversarial-training-images', x_adv, max_outputs=2)

                    # Create init op
                    with tf.name_scope('Initializer'):
                        init_op = tf.global_variables_initializer()

                    # Create file writer for TensorBoard
                    with tf.device('/cpu:0'):
                        writer_train = tf.summary.FileWriter(log_dir + '/train', graph=g)
                        writer_test = tf.summary.FileWriter(log_dir + '/test')

                    # Merge all the summaries
                    merged = tf.summary.merge_all()

            # Create tf session
            config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
            config.gpu_options.allow_growth = True

            with tf.Session(graph=g, config=config) as sess:

                # Initialize all variables
                sess.run(init_op)

                print('\nStart training for {:d} epochs.'.format(FLAGS.epochs))

                # Compute number of batches
                batches = int(math.ceil(float(len(X_train)) / FLAGS.batch_size))
                print('Performing {:d} updates per epoch.'.format(batches))

                print('\nEvaluate on test data...')
                evaluate(sess, x, y, fgsm_perturbation, fgsm_x_adv, X_test, Y_test, -1, FLAGS.batch_size, losses_eval,
                         writer_test)

                # Training loop
                for epoch in range(FLAGS.epochs):

                    print('\nStart of epoch {:d}...'.format(epoch + 1))

                    # Write summaries for optimizer parameters
                    # TODO: For adaptive learning rate methods this does not log the adapted learning rate
                    if FLAGS.optimizer == 'vanilla':
                        learning_rate_val = optimizer._learning_rate
                    elif FLAGS.optimizer == 'momentum':
                        learning_rate_val = optimizer._learning_rate_tensor
                    elif FLAGS.optimizer == 'adagrad':
                        learning_rate_val = optimizer._learning_rate_tensor
                    elif FLAGS.optimizer == 'adam':
                        learning_rate_val = optimizer._lr_t
                    else:
                        raise NotImplementedError

                    learning_rate_val = sess.run(learning_rate_val)
                    summary = tf.Summary()
                    summary.value.add(tag='Optimizer/{:s}'.format('learning-rate'), simple_value=learning_rate_val)
                    writer_train.add_summary(summary, epoch)

                    # Shuffle training data
                    indices = list(range(len(X_train)))
                    np.random.shuffle(indices)

                    # Iterate through the training data in batches
                    sum_batch_loss = 0.0
                    for batch in range(batches):
                        # Compute batch start and end indices
                        start, end = batch_indices(
                            batch, len(X_train), FLAGS.batch_size)

                        feed_dict = {x: X_train[indices[start:end]],
                                     y: Y_train[indices[start:end]]}

                        # Perform a single step of stochastic gradient descent
                        batch_summaries, _, batch_loss, step = sess.run([merged, train_op, loss, global_step],
                                                                        feed_dict=feed_dict)

                        # Accumulate the loss
                        sum_batch_loss += batch_loss

                        # Write tensorboard summaries
                        writer_train.add_summary(batch_summaries, step)

                    print('Epoch: {:d}, Cross-Entropy-Loss (training data): {:.4f}'.format(epoch + 1,
                                                                                           sum_batch_loss / batches))

                    # Evaluate on test data
                    if epoch % FLAGS.eval_step == 0 or epoch + 1 == FLAGS.epochs:
                        print('\nEvaluate on test data...')
                        evaluate(sess, x, y, fgsm_perturbation, fgsm_x_adv, X_test, Y_test, epoch, FLAGS.batch_size,
                                 losses_eval, writer_test)

                print('\nPerformed {:.2f} training iterations.'.format(sess.run(global_step)))
コード例 #21
0
ファイル: main.py プロジェクト: fhahaha/fgsm-attack
def build_training_graph(x, y, learning_rate, method, optimizer, boundaries):
    print('\nBuilding training graph for method {:s}'.format(method))
    print('Using optimizer {:s}'.format(optimizer))

    # Define global step variable
    global_step = tf.get_variable(
        name='global_step',
        shape=[],  # scalar
        dtype=tf.float32,
        initializer=tf.zeros_initializer(),
        trainable=False
    )
    # Build the network
    with tf.name_scope('Logits'):
        logits = train_utils.forward(x, create_summaries=True)

    # Build the network
    with tf.name_scope('Predictions'):
        predictions = layers.softmax(logits)
        utils.variable_summaries(predictions, name='softmax-predictions')

    # Create an op for the loss
    with tf.name_scope('Cross-Entropy-Loss'):
        ce_loss = layers.cross_entropy_loss(logits, y)
        tf.add_to_collection('losses', ce_loss)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):

        if method == 'random':
            with tf.name_scope('RND-Adversarial-Training'):
                rnd_loss, perturbation, x_adv = train_utils.random_loss(x, y, ord='l2', epsilon=3.0)
                additional_loss = rnd_loss
                tf.add_to_collection('losses', rnd_loss)
        elif method == 'advt':
            with tf.name_scope('FGSM-Adversarial-Training'):
                advt_loss, perturbation, x_adv = train_utils.adversarial_loss(x, y, ord='l2', epsilon=3.0)
                additional_loss = advt_loss
                tf.add_to_collection('losses', advt_loss)
        else:
            perturbation = None
            x_adv = None

    # Create an op for the total loss
    with tf.name_scope('Loss'):
        if method == 'advt' or method == 'random':
            loss = (ce_loss + additional_loss) / 2
            loss = tf.identity(loss, name='total-loss')
            tf.add_to_collection('losses', loss)
        else:
            loss = ce_loss
            loss = tf.identity(loss, name='total-loss')
            tf.add_to_collection('losses', loss)

    # Create the optimizer
    with tf.name_scope('Optimizer'):

        # Implement additional learning rate decay
        if boundaries is not None:
            print('Using piecewise constant learning rate decay with boundaries {0}'.format(boundaries))
            values = [0.1, 0.05, 0.025]
            learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
        else:
            learning_rate = tf.constant(learning_rate)

        if optimizer == 'vanilla':
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        elif optimizer == 'momentum':
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=.5)
        elif optimizer == 'adagrad':
            optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=.1)
        elif optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        else:
            raise NotImplementedError

        trainable_vars = tf.trainable_variables()
        grads_and_vars = optimizer.compute_gradients(loss, trainable_vars)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

    return loss, train_op, global_step, grads_and_vars, optimizer, perturbation, x_adv
コード例 #22
0
def train_and_test(X_train,
                   Y_train,
                   X_test,
                   Y_test,
                   batch_size=1000,
                   learning_rate=0.5,
                   n_epochs=1000):
    batch_size = min(min(len(X_train), len(X_test)), batch_size)
    D = len(X_train[0])
    num_class = len(np.unique(np.append(Y_train, Y_test)))

    x = tf.placeholder(tf.float32, [batch_size, D])
    y = tf.placeholder(tf.float32, [batch_size, num_class])

    W = tf.Variable(tf.zeros([D, num_class]))
    b = tf.Variable(tf.zeros([num_class]))

    pred_y = tf.matmul(x, W) + b
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred_y))

    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    print 'Optimization starting!'

    tf.summary.histogram('y', y)
    variable_summaries(W)
    variable_summaries(b)
    tf.summary.histogram('pred_y', pred_y)
    tf.summary.scalar('cross_entropy', cross_entropy)
    merged = tf.summary.merge_all()

    with tf.Session() as sess:
        train_writer = tf.summary.FileWriter(summaries_dir + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(summaries_dir + '/test')
        tf.global_variables_initializer().run()
        n_batches = int(len(X_train) / batch_size)
        for iter in range(n_epochs):  # train the model n_epochs times
            # print iter
            total_loss = 0
            for j in range(n_batches):
                # print j
                X_batch, Y_batch = _get_batch_data(X_train, Y_train,
                                                   batch_size, j)
                Y_batch = generate_one_hot_num_array(Y_batch, num_class)
                curr_step, curr_entropy, summary = sess.run(
                    [train_step, cross_entropy, merged],
                    feed_dict={
                        x: X_batch,
                        y: Y_batch
                    })
                train_writer.add_summary(summary, iter * n_batches + j)
                total_loss += curr_entropy

                if j % 100 == 0:
                    print 'Average loss epoch {0} {1}: {2}'.format(
                        iter, j, total_loss / (j + 1))

            print 'Average loss epoch {0}: {1}'.format(iter,
                                                       total_loss / n_batches)

        print 'Optimization Finished!'  # should be around 0.35 after 25 epochs

        # test the model
        preds = tf.nn.softmax(pred_y)
        correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_sum(tf.cast(
            correct_preds, tf.float32))  # need numpy.count_nonzero(boolarr) :(

        n_batches = int(len(X_test) / batch_size)
        total_correct_preds = 0

        for iter in range(n_batches):
            X_batch, Y_batch = _get_batch_data(X_test, Y_test, batch_size,
                                               iter)
            Y_batch = generate_one_hot_num_array(Y_batch, num_class)
            accuracy_batch, summary = sess.run([accuracy, merged],
                                               feed_dict={
                                                   x: X_batch,
                                                   y: Y_batch
                                               })
            test_writer.add_summary(summary, iter)
            total_correct_preds += accuracy_batch

        print 'Accuracy {0}'.format(total_correct_preds / len(X_test))
        train_writer.close()
        test_writer.close()
コード例 #23
0
ファイル: distributions.py プロジェクト: stjordanis/vi-hds
def build_q_global_cond(PARAMETERS,
                        devs,
                        conds,
                        verbose,
                        kernel_regularizer=None,
                        use_bias=False,
                        stop_grad=False):
    # make a distribution that has "log_prob(theta)" and "sample()"
    q_global_cond = ChainedDistribution(name="q_global_cond")

    if not hasattr(PARAMETERS, "g_c"):
        print("- Found no global conditional params")
        return q_global_cond

    distribution_descriptions = PARAMETERS.g_c

    for distribution_name in distribution_descriptions.list_of_params:
        description = getattr(distribution_descriptions, distribution_name)

        conditioning = description.defaults['c']  # <-- not a tensor

        if verbose:
            print("build_q_global_cond::%s" % distribution_name)
        params = OrderedDict()
        for free_name, constrained_name, free_to_constrained in zip(
                description.free_params, description.params,
                description.free_to_constrained):
            to_concat = []
            if conditioning is not None:  # collect tensors to concat
                if verbose:
                    print("- Conditioning parameter %s.%s" %
                          (distribution_name, free_name))
                if conditioning['treatments']:
                    to_concat.append(conds)
                if conditioning['devices']:
                    to_concat.append(devs)

            mlp_inp = tf.concat(to_concat, axis=1)
            # map sample from prior with conditioning informtion through 1-layer NN
            tf_free_param = tf.layers.dense(
                mlp_inp,
                units=1,
                use_bias=use_bias,
                name='%s_%s' % (distribution_name, free_name),
                kernel_regularizer=kernel_regularizer)
            if stop_grad:
                tf_free_param = tf.stop_gradient(tf_free_param)
            name = os.path.split(tf_free_param.name)[0]
            variable_summaries(
                tf.get_default_graph().get_tensor_by_name(name + '/kernel:0'),
                'nn_weights_%s' % name)
            tf_constrained_param = constrain_parameter(tf_free_param,
                                                       free_to_constrained,
                                                       distribution_name,
                                                       constrained_name)

            params[free_name] = tf_free_param
            params[constrained_name] = tf_constrained_param

        for other_param_name, other_param_value in description.other_params.items(
        ):
            params[other_param_name] = other_param_value

        new_distribution = description.class_type(wait_for_assigned=True,
                                                  variable=True)
        new_distribution.assign_free_and_constrained(**params)

        q_global_cond.add_distribution(distribution_name, new_distribution)

    return q_global_cond
コード例 #24
0
    def set_up(self):
        spec = load_config_file(self.args.yaml)  # spec is a dict of dicts of dicts

        # Import the correct model
        self.params_dict = apply_defaults(spec["params"])

        # time some things, like epoch time
        start_time = time.time()

        # ---------------------------------------- #
        #     DEFINE XVAL DATASETS                 #
        # ---------------------------------------- #

        # Create self.dataset_pair: DatasetPair containing train and val Datasets.
        self._prepare_data(spec["data"])
        # Number of instances to put in a training batch.
        self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train)

        # This is already a model object because of the use of "!!python/object:... in the yaml file.
        model = self.params_dict["model"]
        # Set various attributes of the model
        model.init_with_params(self.params_dict, self.procdata.relevance_vectors)
        
        # Import priors from YAML
        parameters = Parameters()
        parameters.load(self.params_dict)

        print("----------------------------------------------")
        if self.args.verbose:
            print("parameters:")
            parameters.pretty_print()
        n_vals = LocalAndGlobal.from_list(parameters.get_parameter_counts())
        self.n_theta = n_vals.sum()

        #     TENSORFLOW PARTS        #
        self.placeholders = Placeholders(self.dataset_pair, n_vals)

        # feed_dicts are used to supply placeholders, these are for the entire train/val dataset, there is a batch one below.
        self._create_feed_dicts()

        # time-series of species differences: x_delta_obs is BATCH x (nTimes-1) x nSpecies
        x_delta_obs = self.placeholders.x_obs[:, 1:, :] - self.placeholders.x_obs[:, :-1, :]

        # DEFINE THE ENCODER NN: for LOCAL PARAMETERS
        print("Set up encoder")
        self.encoder = Encoder(self.args.verbose, parameters, self.placeholders, x_delta_obs)

        # DEFINE THE DECODER NN
        print("Set up decoder")
        self.decoder = Decoder(self.args.verbose, self.params_dict, self.placeholders, self.dataset_pair.times, self.encoder)

        # DEFINE THE OBJECTIVE and GRADIENTS
        # likelihood p (x | theta)
        print("Set up objective")
        self.objective = Objective(self.encoder, self.decoder, model, self.placeholders)

        # SET-UP tensorflow LEARNING/OPTIMIZER
        self.training_stepper = TrainingStepper(self.args.dreg, self.encoder, self.objective, self.params_dict)
        time_interval = time.time() - start_time
        print("Time before sess: %g" % time_interval)

        # TENSORBOARD VISUALIZATION            #
        ts_to_vis = 1
        self.encoder.q.attach_summaries()  # global and local parameters of q distribution
        unnormed_iw = self.objective.log_unnormalized_iws[ts_to_vis, :]
        self_normed_iw = self.objective.normalized_iws[ts_to_vis, :]   # not in log space
        with tf.name_scope('IWS'):
            variable_summaries(unnormed_iw, 'iws_unn_log')
            variable_summaries(self_normed_iw, 'iws_normed')
            tf.summary.scalar('nonzeros', tf.count_nonzero(self_normed_iw))

        #print(tf.shape(log_p_observations))
        with tf.name_scope('ELBO'):
            tf.summary.scalar('log_p', tf.reduce_mean(self.training_stepper.logsumexp_log_p))  # [batch, 1]
            tf.summary.scalar('log_prior', tf.reduce_mean(self.training_stepper.logsumexp_log_p_theta))
            tf.summary.scalar('loq_q', tf.reduce_mean(self.training_stepper.logsumexp_log_q_theta))
            tf.summary.scalar('elbo', self.objective.elbo)
コード例 #25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = segmentModel(config, is_training, features, embedding)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = model_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            utils.variable_summaries(var)
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:
            (total_loss, per_example_loss, label_ids, prediction,
             seq_length) = model.get_all_results()

            weight = tf.sequence_mask(seq_length, dtype=tf.int64)
            accuracy = tf.metrics.accuracy(label_ids,
                                           prediction,
                                           weights=weight)

            tf.summary.scalar('accuracy', accuracy[1])
            l2_reg_lamda = config.l2_reg_lamda
            clip = 5

            with tf.variable_scope('train_op'):
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                total_loss = total_loss + l2_reg_lamda * l2_loss
                grads, _ = tf.clip_by_global_norm(
                    tf.gradients(total_loss, tvars), clip)
                global_step = tf.train.get_or_create_global_step()
                train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                     global_step=global_step)

            logging_hook = tf.train.LoggingTensorHook(
                {"accuracy": accuracy[1]}, every_n_iter=100)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook])
        elif mode == tf.estimator.ModeKeys.EVAL:
            (total_loss, per_example_loss, label_ids, prediction,
             seq_length) = model.get_all_results()
            loss = tf.metrics.mean(per_example_loss)

            weight = tf.sequence_mask(seq_length, dtype=tf.int64)
            accuracy = tf.metrics.accuracy(label_ids,
                                           prediction,
                                           weights=weight)
            metrics = {"eval_loss": loss, "eval_accuracy": accuracy}

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     eval_metric_ops=metrics)
        else:
            input_ids = features["input_ids"]
            label_ids = features["label_ids"]
            (_, _, _, prediction, seq_length) = model.get_all_results()
            predictions = {
                "input_ids": input_ids,
                "prediction": prediction,
                "ground_truths": label_ids,
                "length": seq_length
            }
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=predictions)
        return output_spec
コード例 #26
0
    def __init__(self, hyperparams, train_config):

        self.train_config = train_config

        # create placeholder
        self.u = tf.placeholder(tf.int32, [None])  # [B]
        self.i = tf.placeholder(tf.int32, [None])  # [B]
        self.y = tf.placeholder(tf.float32, [None])  # [B]
        self.w = tf.placeholder(tf.float32, [None])  # [B]
        self.lr = tf.placeholder(tf.float32, [], name='learning_rate')

        # -- create embed begin ----
        user_emb_w = tf.get_variable(
            "user_emb_w",
            [hyperparams['num_users'], hyperparams['user_embed_dim']])
        item_emb_w = tf.get_variable(
            "item_emb_w",
            [hyperparams['num_items'], hyperparams['item_embed_dim']])
        user_b = tf.get_variable("user_b", [hyperparams['num_users']],
                                 initializer=tf.constant_initializer(0.0))
        item_b = tf.get_variable("item_b", [hyperparams['num_items']],
                                 initializer=tf.constant_initializer(0.0))
        # -- create embed end ----

        # -- embed begin -------
        u_emb = tf.nn.embedding_lookup(user_emb_w, self.u)
        i_emb = tf.nn.embedding_lookup(item_emb_w, self.i)
        u_b = tf.gather(user_b, self.u)  # [B]
        i_b = tf.gather(item_b, self.i)  # [B]
        # -- embed end -------

        interaction = tf.reduce_sum(u_emb * i_emb, axis=-1)  # [B]
        self.logits = interaction + u_b + i_b  # [B]
        self.scores = tf.nn.sigmoid(
            self.logits)  # scores is logits into sigmoid, for inference

        variable_summaries(self.logits, 'logits')
        variable_summaries(self.scores, 'scores')

        # return same dimension as input tensors, let x = logits, z = labels, z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
        self.losses = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.logits, labels=self.y)
        variable_summaries(self.losses, 'loss')

        self.loss = tf.reduce_mean(self.losses * self.w)  # for training loss

        # global update step variable
        self.global_step = tf.Variable(0, trainable=False, name='global_step')

        # optimizer
        if train_config['optimizer'] == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        elif train_config['optimizer'] == 'rmsprop':
            optimizer = tf.train.RMSPropOptimizer(learning_rate=self.lr)
        else:
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.lr)

        # compute gradients and different update step
        trainable_params = tf.trainable_variables()
        grads = tf.gradients(
            self.loss, trainable_params
        )  # return a list of gradients (A list of `sum(dy/dx)` for each x in `xs`)
        clip_grads, _ = tf.clip_by_global_norm(grads, 5)
        clip_grads_tuples = zip(clip_grads, trainable_params)
        self.train_op = optimizer.apply_gradients(clip_grads_tuples,
                                                  global_step=self.global_step)

        self.merged = tf.summary.merge_all()
コード例 #27
0
def create_network(img_size, num_channels, num_classes, shape1, shape2, num_fc_layer1_output, num_fc_layer2_output, learning_rate, bn=False):

    # PLACEHOLDER VARIABLES
    x = tf.placeholder(tf.float32, shape=[None, img_size * img_size * num_channels], name='x')
    x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])

    y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
    y_true_cls = tf.argmax(y_true, dimension=1)

    fc_layer1_keep_prob = tf.placeholder(tf.float32, name='keep_prob')

    phase_train = tf.placeholder(tf.bool, name='phase_train')

    placeholders = {
        'x': x,
        'x_image': x_image,
        'y_true': y_true,
        'y_true_cls': y_true_cls,
        'fc_layer1_keep_prob': fc_layer1_keep_prob,
        'phase_train': phase_train
    }

    # CONVOLUTIONAL LAYER 1
    with tf.variable_scope('conv_1'):
        conv_weights1 = tf.Variable(tf.truncated_normal(shape1, stddev=0.05))
        conv_biases1 = tf.Variable(tf.constant(0.05, shape=[shape1[3]]))

        conv_layer1 = tf.nn.conv2d(input=x_image, filter=conv_weights1, strides=[1, 1, 1, 1], padding='SAME') + conv_biases1

    # POOLING LAYER 1
    with tf.variable_scope('pool_1'):
        conv_layer1 = tf.nn.max_pool(value=conv_layer1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # BATCH NORMALIZATION
    if bn:
        with tf.variable_scope('bn_1'):
            conv_layer1 = batch_norm(conv_layer1, phase_train)

    # RELU
    with tf.variable_scope('relu_1'):
        conv_layer1 = tf.nn.relu(conv_layer1)

    # CONVOLUTIONAL LAYER 2
    with tf.variable_scope('conv_2'):
        conv_weights2 = tf.Variable(tf.truncated_normal(shape2, stddev=0.05))
        conv_biases2 = tf.Variable(tf.constant(0.05, shape=[shape2[3]]))

        conv_layer2 = tf.nn.conv2d(input=conv_layer1, filter=conv_weights2, strides=[1, 1, 1, 1], padding='SAME') + conv_biases2

    # POOLING LAYER 2
    with tf.variable_scope('pool_2'):
        conv_layer2 = tf.nn.max_pool(value=conv_layer2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # BATCH NORMALIZATION
    if bn:
        with tf.variable_scope('bn_2'):
            conv_layer2 = batch_norm(conv_layer2, phase_train)

    # RELU
    with tf.variable_scope('relu_2'):
        conv_layer2 = tf.nn.relu(conv_layer2)

    # FLATTEN LAYER
    with tf.variable_scope('flatten'):
        layer_shape = conv_layer2.get_shape()
        num_features = layer_shape[1:4].num_elements()  # [num_images, img_height * img_width * num_channels]

        layer_flat = tf.reshape(conv_layer2, [-1, num_features])

    # FULLY CONNECTED LAYER 1
    with tf.variable_scope('fc_1'):
        fc_weights1 = tf.Variable(tf.truncated_normal(shape=[num_features, num_fc_layer1_output], stddev=0.05))
        variable_summaries(fc_weights1)
        fc_biases1 = tf.Variable(tf.constant(0.05, shape=[num_fc_layer1_output]))
        variable_summaries(fc_biases1)

        fc_layer1 = tf.matmul(layer_flat, fc_weights1) + fc_biases1
        tf.summary.histogram('fc_layer1', fc_layer1)

    # BATCH NORMALIZATION
    if bn:
        with tf.variable_scope('fc_bn_1'):
            fc_layer1 = batch_norm(fc_layer1, phase_train)

    # RELU
    with tf.variable_scope('fc_relu_1'):
        fc_layer1 = tf.nn.relu(fc_layer1)

    # DROPOUT LAYER 1
    with tf.variable_scope('dropout_1'):
        fc_layer1_dropout = tf.nn.dropout(fc_layer1, fc_layer1_keep_prob)

    # FULLY CONNECTED LAYER 2
    with tf.variable_scope('fc_2'):
        fc_weights2 = tf.Variable(tf.truncated_normal(shape=[num_fc_layer1_output, num_fc_layer2_output], stddev=0.05))
        variable_summaries(fc_weights2)

        fc_biases2 = tf.Variable(tf.constant(0.05, shape=[num_fc_layer2_output]))
        variable_summaries(fc_biases2)

        fc_layer2 = tf.matmul(fc_layer1_dropout, fc_weights2) + fc_biases2
        tf.summary.histogram('fc_layer2', fc_layer2)

    # BATCH NORMALIZATION
    if bn:
        with tf.variable_scope('fc_bn_2'):
            fc_layer2 = batch_norm(fc_layer2, phase_train)

    # SOFTMAX
    with tf.variable_scope('softmax'):
        y_pred = tf.nn.softmax(fc_layer2)
        y_pred_cls = tf.argmax(y_pred, dimension=1)

        tf.summary.histogram('y_pred', y_pred)

    # COST FUNCTION
    with tf.variable_scope('cost'):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer2, labels=y_true))
        # cost = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(y_pred), reduction_indices=[1]))
        tf.summary.histogram('cost', cost)

    # GRADIENT DESCENT METHOD - ADAM OPTIMIZER
    train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # PERFORMANCE MEASURES
    correct_prediction = tf.equal(y_pred_cls, y_true_cls)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.histogram('accuracy', accuracy)

    return train_step, cost, accuracy, y_pred, y_pred_cls, y_true_cls, placeholders
コード例 #28
0
ファイル: ModelAdapter.py プロジェクト: Jacob-Zhou/CWSwD-PL
    def __init__(self,
                 segment_model,
                 dim_info,
                 config,
                 init_checkpoint,
                 tokenizer,
                 learning_rate,
                 init_embedding=None):
        uni_embedding = None
        bi_embedding = None
        if init_embedding is not None:
            uni_embedding = utils.get_embedding(init_embedding,
                                                tokenizer.vocab,
                                                config.embedding_size)
            if "bigram_vocab" in tokenizer.__dict__:
                bi_embedding = utils.get_embedding(init_embedding,
                                                   tokenizer.bigram_vocab,
                                                   config.embedding_size)

        self.input_ids = tf.placeholder(
            dtype=tf.int64,
            shape=[None, None, dim_info.feature_dims['input_ids']],
            name='input_ids')
        self.input_dicts = tf.placeholder(
            dtype=tf.int64,
            shape=[None, None, dim_info.feature_dims['input_dicts']],
            name='input_dicts')
        if dim_info.label_dim == 1:
            self.label_ids = tf.placeholder(dtype=tf.int64,
                                            shape=[None, None],
                                            name='label_ids')
        else:
            self.label_ids = tf.placeholder(
                dtype=tf.int64,
                shape=[None, None, dim_info.label_dim],
                name='label_ids')
        self.seq_length = tf.placeholder(dtype=tf.int64,
                                         shape=[None],
                                         name='seq_length')

        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.learning_rate = tf.Variable(learning_rate, trainable=False)
        self.new_learning_rate = tf.placeholder(tf.float32,
                                                shape=[],
                                                name="new_learning_rate")

        features = {
            "input_ids": self.input_ids,
            "input_dicts": self.input_dicts,
            "label_ids": self.label_ids,
            "seq_length": self.seq_length
        }

        self.model = segment_model(config,
                                   features,
                                   self.dropout_keep_prob,
                                   init_embeddings={
                                       "uni_embedding": uni_embedding,
                                       "bi_embedding": bi_embedding
                                   })

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = model_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            utils.variable_summaries(var)
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        (loss, label_ids, prediction,
         seq_length) = self.model.get_all_results()

        l2_reg_lamda = config.l2_reg_lamda
        clip = 5

        with tf.variable_scope('train_op'):
            self.lr_update = tf.assign(self.learning_rate,
                                       self.new_learning_rate)
            global_step = tf.train.get_or_create_global_step()
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

            if l2_reg_lamda > 0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars
                    if (v.get_shape().ndims > 1 and "rate" not in v.name)
                ])
                tf.logging.info("**** L2 Loss Variables ****")
                for var in tvars:
                    if var.get_shape().ndims > 1 and "rate" not in var.name:
                        tf.logging.info("  name = %s, shape = %s", var.name,
                                        var.shape)
                total_loss = loss + l2_reg_lamda * l2_loss
            else:
                total_loss = loss

            if config.clip_grad:
                grads, _ = tf.clip_by_global_norm(
                    tf.gradients(total_loss, tvars), clip)
                train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                     global_step=global_step)
            else:
                train_op = optimizer.minimize(total_loss,
                                              global_step=global_step)

        self.loss = loss
        self.total_loss = total_loss
        self.seq_length = seq_length
        self.prediction = prediction
        self.train_op = train_op
コード例 #29
0
def coattention_encoder(D, Q, documents_lengths, questions_lengths,
                        hyperparameters):
    # D[i] = document i in the batch, Q[i] = question i in the batch
    with tf.name_scope("sentinels"):
        with tf.variable_scope("sentinel_d"):
            sentinel_d = bias_variable([hyperparameters.hidden_size])
            variable_summaries(sentinel_d)
        with tf.variable_scope("sentinel_q"):
            sentinel_q = bias_variable([hyperparameters.hidden_size])
            variable_summaries(sentinel_q)
        # append sentinels at the end of documents
        expanded_sentinel_d = tf.expand_dims(tf.expand_dims(sentinel_d, 0), 0)
        tiled_sentinel_d = tf.tile(expanded_sentinel_d,
                                   [hyperparameters.batch_size, 1, 1])
        D = tf.concat([D, tiled_sentinel_d], axis=1)
        # append sentinels at the end of questions
        expanded_sentinel_q = tf.expand_dims(tf.expand_dims(sentinel_q, 0), 0)
        tiled_sentinel_q = tf.tile(expanded_sentinel_q,
                                   [hyperparameters.batch_size, 1, 1])
        Q = tf.concat([Q, tiled_sentinel_q], axis=1)

    L = tf.matmul(D, tf.transpose(Q, perm=[0, 2, 1]))
    if hyperparameters.padding_mask:
        document_end_indices = tf.subtract(documents_lengths, 1)
        question_end_indices = tf.subtract(questions_lengths, 1)
        doc_words_mask = tf.math.cumsum(tf.one_hot(
            document_end_indices, hyperparameters.max_doc_len),
                                        axis=1,
                                        reverse=True)
        que_words_mask = tf.math.cumsum(tf.one_hot(
            question_end_indices, hyperparameters.max_que_len),
                                        axis=1,
                                        reverse=True)
        # add sentinels
        sentinel_mask = tf.ones([hyperparameters.batch_size, 1])
        doc_words_mask = tf.concat([doc_words_mask, sentinel_mask], axis=1)
        que_words_mask = tf.concat([que_words_mask, sentinel_mask], axis=1)
        words_mask = tf.matmul(tf.expand_dims(doc_words_mask, axis=2),
                               tf.expand_dims(que_words_mask, axis=1))
        negative_padding_mask = tf.subtract(words_mask, 1)
        min_float_at_padding = tf.multiply(
            negative_padding_mask, tf.cast(-0.5 * tf.float32.min, tf.float32))
        L = tf.add(L, min_float_at_padding)

    A_Q = tf.nn.softmax(L,
                        axis=int(hyperparameters.softmax_axis),
                        name="softmaxed_L")
    A_D = tf.nn.softmax(tf.transpose(L, perm=[0, 2, 1]),
                        axis=int(hyperparameters.softmax_axis),
                        name="softmaxed_L_transpose")
    C_Q = tf.matmul(tf.transpose(D, perm=[0, 2, 1]), A_Q)

    C_D_2 = tf.matmul(C_Q, A_D)
    C_Q_2 = tf.matmul(C_D_2, A_Q)
    #print('C_D_2', C_D_2.shape)
    #print('C_Q_2', C_Q_2.shape)
    concat_1 = tf.concat([tf.transpose(Q, perm=[0, 2, 1]), C_Q], 1)
    concat_1_1 = tf.concat([tf.transpose(Q, perm=[0, 2, 1]), C_Q, C_Q_2], 1)
    if int(hyperparameters.coattention) == 0:
        C_D = tf.matmul(tf.transpose(Q, perm=[0, 2, 1]), A_D)
    elif int(hyperparameters.coattention) == 1:
        C_D = tf.matmul(concat_1, A_D)
    elif int(hyperparameters.coattention) == 2:
        C_D = tf.matmul(concat_1_1, A_D)
    concat_2 = tf.concat([tf.transpose(D, perm=[0, 2, 1]), C_D], 1)
    concat_2 = tf.transpose(concat_2, perm=[0, 2, 1])
    concat_2 = concat_2[:, :-1, :]  # remove sentinels

    BiLSTM_outputs, BiLSTM_final_fw_state, BiLSTM_final_bw_state = dynamic_bilstm(
        concat_2, documents_lengths, hyperparameters)

    if hyperparameters.bi_lstm_encoding_dropout:
        BiLSTM_outputs = tf.nn.dropout(BiLSTM_outputs,
                                       keep_prob=hyperparameters.keep_prob)

    if (hyperparameters.squad2_vector or hyperparameters.squad2_lstm):
        with tf.name_scope("SQuAD_2"):
            if (hyperparameters.squad2_vector):
                impossible_encoding = bias_variable(
                    [2 * hyperparameters.hidden_size])
                variable_summaries(impossible_encoding)
                impossible_encoding = tf.expand_dims(tf.expand_dims(
                    impossible_encoding, axis=0),
                                                     axis=0)
                impossible_encoding = tf.tile(
                    impossible_encoding, [hyperparameters.batch_size, 1, 1])
            elif (hyperparameters.squad2_lstm):
                encodings, final_state = dynamic_lstm_with_hidden_size(
                    concat_2, documents_lengths, hyperparameters,
                    2 * hyperparameters.hidden_size, False)
                impossible_encoding = encodings[:, -1]
                variable_summaries(impossible_encoding)
                impossible_encoding = tf.expand_dims(impossible_encoding,
                                                     axis=1)
        BiLSTM_outputs = tf.concat([BiLSTM_outputs, impossible_encoding],
                                   axis=1)

    return L, BiLSTM_outputs
コード例 #30
0
    def model(self, X, Y):
        feature = int(np.prod(X.get_shape()[1:]))
        classes = int(np.prod(Y.get_shape()[1:]))
        x_image = tf.reshape(X, [-1, feature, 1, 1])

        # 1st conv layer
        with tf.name_scope('conv1') as scope:
            w = weight_variable([5, 1, 1, 32])
            b = bias_variable([32])
            h = tf.nn.relu(conv2d(x_image, w) + b)
            conv1 = max_pool_2x2(h)
            # print "conv1 shape: ", h.get_shape()
            # print "pool1 shape: ", conv1.get_shape()

        # 2nd conv layer
        with tf.name_scope('conv2') as scope:
            w = weight_variable([5, 1, 32, 64])
            b = bias_variable([64])
            h = tf.nn.relu(conv2d(conv1, w) + b)
            conv2 = max_pool_2x2(h)
            # print "conv2 shape: ", h.get_shape()
            # print "pool2 shape: ", conv2.get_shape()

        # 3rd conv layer
        with tf.name_scope('conv3') as scope:
            w = weight_variable([5, 1, 64, 64])
            b = bias_variable([64])
            conv3 = tf.nn.relu(conv2d(conv2, w) + b)
            # print "conv3 shape: ", conv3.get_shape()

        # 4th conv layer
        with tf.name_scope('conv4') as scope:
            w = weight_variable([5, 1, 64, 64])
            b = bias_variable([64])
            conv4 = tf.nn.relu(conv2d(conv3, w) + b)
            # print "conv4 shape: ", conv4.get_shape()

        # 5th conv layer
        with tf.name_scope('conv5') as scope:
            w = weight_variable([5, 1, 64, 64])
            b = bias_variable([64])
            h = tf.nn.relu(conv2d(conv4, w) + b)
            conv5 = max_pool_2x2(h)
            # print "conv5 shape: ", h.get_shape()
            # print "pool5 shape: ", conv5.get_shape()

        # dropout
        keep_prob = tf.placeholder(tf.float32)

        # 1st fc layer
        with tf.name_scope('fc1') as scope:
            shape = int(np.prod(conv5.get_shape()[1:]))
            print('shape: ', shape)
            conv5_flat = tf.reshape(conv5, [-1, shape])
            w = weight_variable([shape, 1024])
            b = bias_variable([1024])

            h = tf.nn.relu(tf.matmul(conv5_flat, w) + b)
            fc1 = tf.nn.dropout(h, keep_prob)
            # print "fc1 shape: ", fc1.get_shape()

        # 2nd fc layer
        with tf.name_scope('fc2') as scope:
            w = weight_variable([1024, 512])
            b = bias_variable([512])
            h = tf.nn.relu(tf.matmul(fc1, w) + b)
            fc2 = tf.nn.dropout(h, keep_prob)
            # print "fc2 shape: ", fc2.get_shape()

        # 3rd fc layer
        with tf.name_scope('fc3') as scope:
            w = weight_variable([512, classes])
            b = bias_variable([classes])
            logits = tf.matmul(fc2, w) + b
            # print "logits shape: ", logits.get_shape()
            entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y,
                                                              logits=logits,
                                                              name='loss')
            loss = tf.reduce_mean(entropy)
            variable_summaries(loss, 'loss')

        return logits, loss, keep_prob, "alex"