Beispiel #1
0
    def __conv(self,
               input,
               filter_width,
               filter_height,
               filters_count,
               stride_x,
               stride_y,
               padding='VALID',
               init_biases_with_the_constant_1=False,
               name='conv'):
        with tf.name_scope(name):
            input_channels = input.get_shape().as_list()[-1]
            filters = tf.Variable(self.__random_values(shape=[
                filter_height, filter_width, input_channels, filters_count
            ]),
                                  name='filters')
            convs = tf.nn.conv2d(input=input,
                                 filter=filters,
                                 strides=[1, stride_y, stride_x, 1],
                                 padding=padding,
                                 name='convs')
            if init_biases_with_the_constant_1:
                biases = tf.Variable(tf.ones(shape=[filters_count],
                                             dtype=tf.float32),
                                     name='biases')
            else:
                biases = tf.Variable(tf.zeros(shape=[filters_count],
                                              dtype=tf.float32),
                                     name='biases')
            preactivations = tf.nn.bias_add(convs,
                                            biases,
                                            name='preactivations')
            activations = tf.nn.relu(preactivations, name='activations')

            with tf.name_scope('filter_summaries'):
                self.__variable_summaries(filters)

            with tf.name_scope('bias_summaries'):
                self.__variable_summaries(biases)

            with tf.name_scope('preactivations_histogram'):
                tf.summary.histogram('preactivations', preactivations)

            with tf.name_scope('activations_histogram'):
                tf.summary.histogram('activations', activations)

            return activations
Beispiel #2
0
def sample_sequence(*, hparams, length, start_token=None, batch_size=None, context=None, temperature=1, top_k=0, top_p=0.0):
    if start_token is None:
        assert context is not None, 'Specify exactly one of start_token and context!'
    else:
        assert context is None, 'Specify exactly one of start_token and context!'
        context = tf.fill([batch_size, 1], start_token)

    def step(hparams, tokens, past=None):
        lm_output = model.model(hparams=hparams, X=tokens, past=past, reuse=tf.AUTO_REUSE)

        logits = lm_output['logits'][:, :, :hparams.n_vocab]
        presents = lm_output['present']
        presents.set_shape(model.past_shape(hparams=hparams, batch_size=batch_size))
        return {
            'logits': logits,
            'presents': presents,
        }

    with tf.name_scope('sample_sequence'):
        # Don't feed the last context token -- leave that to the loop below
        # TODO: Would be slightly faster if we called step on the entire context,
        # rather than leaving the last token transformer calculation to the while loop.
        context_output = step(hparams, context[:, :-1])

        def body(past, prev, output):
            next_outputs = step(hparams, prev[:, tf.newaxis], past=past)
            logits = next_outputs['logits'][:, -1, :]  / tf.to_float(temperature)
            if top_p > 0.0:
                logits = top_p_logits(logits, p=top_p)
            else:
                logits = top_k_logits(logits, k=top_k)
            samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32)
            return [
                tf.concat([past, next_outputs['presents']], axis=-2),
                tf.squeeze(samples, axis=[1]),
                tf.concat([output, samples], axis=1),
            ]

        def cond(*args):
            return True

        _, _, tokens = tf.while_loop(
            cond=cond, body=body,
            maximum_iterations=length,
            loop_vars=[
                context_output['presents'],
                context[:, -1],
                context,
            ],
            shape_invariants=[
                tf.TensorShape(model.past_shape(hparams=hparams, batch_size=batch_size)),
                tf.TensorShape([batch_size]),
                tf.TensorShape([batch_size, None]),
            ],
            back_prop=False,
        )

        return tokens
Beispiel #3
0
 def __local_response_normalization(self, input, name='lrn'):
     # From article: Local Response Normalization: we used k=2, n=5, α=10^−4, and β=0.75.
     with tf.name_scope(name):
         lrn = tf.nn.local_response_normalization(
             input=input,
             depth_radius=2,
             alpha=10**-4,
             beta=0.75,
             name='local_response_normalization')
         return lrn
Beispiel #4
0
    def __fully_connected(self,
                          input,
                          inputs_count,
                          outputs_count,
                          relu=True,
                          init_biases_with_the_constant_1=False,
                          name='fully_connected'):
        with tf.name_scope(name):
            wights = tf.Variable(
                self.__random_values(shape=[inputs_count, outputs_count]),
                name='wights')
            if init_biases_with_the_constant_1:
                biases = tf.Variable(tf.ones(shape=[outputs_count],
                                             dtype=tf.float32),
                                     name='biases')
            else:
                biases = tf.Variable(tf.zeros(shape=[outputs_count],
                                              dtype=tf.float32),
                                     name='biases')
            preactivations = tf.nn.bias_add(tf.matmul(input, wights),
                                            biases,
                                            name='preactivations')
            if relu:
                activations = tf.nn.relu(preactivations, name='activations')

            with tf.name_scope('wight_summaries'):
                self.__variable_summaries(wights)

            with tf.name_scope('bias_summaries'):
                self.__variable_summaries(biases)

            with tf.name_scope('preactivations_histogram'):
                tf.summary.histogram('preactivations', preactivations)

            if relu:
                with tf.name_scope('activations_histogram'):
                    tf.summary.histogram('activations', activations)

            if relu:
                return activations
            else:
                return preactivations
Beispiel #5
0
 def __max_pool(self,
                input,
                filter_width,
                filter_height,
                stride_x,
                stride_y,
                padding='VALID',
                name='pool'):
     with tf.name_scope(name):
         pool = tf.nn.max_pool(input,
                               ksize=[1, filter_height, filter_width, 1],
                               strides=[1, stride_y, stride_x, 1],
                               padding=padding,
                               name='pool')
         return pool
def capture_ops():
    """Decorator to capture ops created in the block.
    with capture_ops() as ops:
      # create some ops
    print(ops) # => prints ops created.
    """

    micros = int(time.time() * 10 ** 6)
    scope_name = str(micros)
    op_list = []
    with tf.name_scope(scope_name):
        yield op_list

    g = tf.get_default_graph()
    op_list.extend(ge.select_ops(scope_name + "/.*", graph=g))
Beispiel #7
0
def pca(x, dim=2):
    '''
        x:输入矩阵
        dim:降维之后的维度数
    '''
    with tf.name_scope("PCA"):

        m, n = tf.to_float(x.get_shape()[0]), tf.to_int32(x.get_shape()[1])
        assert not tf.assert_less(dim, n)
        mean = tf.reduce_mean(x, axis=1)
        x_new = x - tf.reshape(mean, (-1, 1))
        cov = tf.matmul(x_new, x_new, transpose_a=True) / (m - 1)
        e, v = tf.linalg.eigh(cov, name="eigh")
        e_index_sort = tf.math.top_k(e, sorted=True, k=dim)[1]
        v_new = tf.gather(v, indices=e_index_sort)
        pca = tf.matmul(x_new, v_new, transpose_b=True)
    return pca
Beispiel #8
0
    def __init__(self,
                 input_width=227,
                 input_height=227,
                 input_channels=3,
                 num_classes=1000,
                 learning_rate=0.01,
                 momentum=0.9,
                 keep_prob=0.5):

        # From article: The learning rate was initialized at 0.01.
        # From article: We trained our models using stochastic gradient descent with a batch size of 128 examples,
        # momentum of 0.9, and weight decay of 0.0005

        # From article: We initialized the weights in each layer from a zero-mean Gaussian distribution with standard
        # deviation 0.01.

        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.keep_prob = keep_prob

        self.random_mean = 0
        self.random_stddev = 0.01

        # ----------------------------------------------------------------------------------------------------

        # From article: We initialized the neuron biases in the second, fourth, and fifth convolutional layers, as well
        # as in the fully-connected hidden layers, with the constant 1. ... We initialized the neuron biases in the
        # remaining layers with the constant 0.

        # Input: 227x227x3.
        with tf.name_scope('input'):
            self.X = tf.placeholder(dtype=tf.float32,
                                    shape=[
                                        None, self.input_height,
                                        self.input_width, self.input_channels
                                    ],
                                    name='X')

        # Labels: 1000.
        with tf.name_scope('labels'):
            self.Y = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.num_classes],
                                    name='Y')

        # Dropout keep prob.
        with tf.name_scope('dropout'):
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                    shape=(),
                                                    name='dropout_keep_prob')

        # Layer 1.
        # [Input] ==> 227x227x3
        # --> 227x227x3 ==> [Convolution: size=(11x11x3)x96, strides=4, padding=valid] ==> 55x55x96
        # --> 55x55x96 ==> [ReLU] ==> 55x55x96
        # --> 55x55x96 ==> [Local Response Normalization] ==> 55x55x96
        # --> 55x55x96 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 27x27x96
        # --> [Output] ==> 27x27x96
        # Note: 48*2=96, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer1'):
            layer1_activations = self.__conv(
                input=self.X,
                filter_width=11,
                filter_height=11,
                filters_count=96,
                stride_x=4,
                stride_y=4,
                padding='VALID',
                init_biases_with_the_constant_1=False)
            layer1_lrn = self.__local_response_normalization(
                input=layer1_activations)
            layer1_pool = self.__max_pool(input=layer1_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 2.
        # [Input] ==> 27x27x96
        # --> 27x27x96 ==> [Convolution: size=(5x5x96)x256, strides=1, padding=same] ==> 27x27x256
        # --> 27x27x256 ==> [ReLU] ==> 27x27x256
        # --> 27x27x256 ==> [Local Response Normalization] ==> 27x27x256
        # --> 27x27x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 13x13x256
        # --> [Output] ==> 13x13x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer2'):
            layer2_activations = self.__conv(
                input=layer1_pool,
                filter_width=5,
                filter_height=5,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer2_lrn = self.__local_response_normalization(
                input=layer2_activations)
            layer2_pool = self.__max_pool(input=layer2_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 3.
        # [Input] ==> 13x13x256
        # --> 13x13x256 ==> [Convolution: size=(3x3x256)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer3'):
            layer3_activations = self.__conv(
                input=layer2_pool,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=False)

        # Layer 4.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer4'):
            layer4_activations = self.__conv(
                input=layer3_activations,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)

        # Layer 5.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x256, strides=1, padding=same] ==> 13x13x256
        # --> 13x13x256 ==> [ReLU] ==> 13x13x256
        # --> 13x13x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 6x6x256
        # --> [Output] ==> 6x6x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer5'):
            layer5_activations = self.__conv(
                input=layer4_activations,
                filter_width=3,
                filter_height=3,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer5_pool = self.__max_pool(input=layer5_activations,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 6.
        # [Input] ==> 6x6x256=9216
        # --> 9216 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer6'):
            pool5_shape = layer5_pool.get_shape().as_list()
            flattened_input_size = pool5_shape[1] * pool5_shape[
                2] * pool5_shape[3]
            layer6_fc = self.__fully_connected(
                input=tf.reshape(layer5_pool, shape=[-1,
                                                     flattened_input_size]),
                inputs_count=flattened_input_size,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer6_dropout = self.__dropout(input=layer6_fc)

        # Layer 7.
        # [Input] ==> 4096
        # --> 4096 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer7'):
            layer7_fc = self.__fully_connected(
                input=layer6_dropout,
                inputs_count=4096,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer7_dropout = self.__dropout(input=layer7_fc)

        # Layer 8.
        # [Input] ==> 4096
        # --> 4096 ==> [Logits: neurons=1000] ==> 1000
        # --> [Output] ==> 1000
        with tf.name_scope('layer8'):
            layer8_logits = self.__fully_connected(
                input=layer7_dropout,
                inputs_count=4096,
                outputs_count=self.num_classes,
                relu=False,
                name='logits')

        # Cross Entropy.
        with tf.name_scope('cross_entropy'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=layer8_logits, labels=self.Y, name='cross_entropy')
            self.__variable_summaries(cross_entropy)

        # Training.
        with tf.name_scope('training'):
            loss_operation = tf.reduce_mean(cross_entropy,
                                            name='loss_operation')
            tf.summary.scalar(name='loss', tensor=loss_operation)

            optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate, momentum=self.momentum)

            # self.training_operation = optimizer.minimize(loss_operation, name='training_operation')

            grads_and_vars = optimizer.compute_gradients(loss_operation)
            self.training_operation = optimizer.apply_gradients(
                grads_and_vars, name='training_operation')

            for grad, var in grads_and_vars:
                if grad is not None:
                    with tf.name_scope(var.op.name + '/gradients'):
                        self.__variable_summaries(grad)

        # Accuracy.
        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(layer8_logits, 1),
                                          tf.argmax(self.Y, 1),
                                          name='correct_prediction')
            self.accuracy_operation = tf.reduce_mean(tf.cast(
                correct_prediction, tf.float32),
                                                     name='accuracy_operation')
            tf.summary.scalar(name='accuracy', tensor=self.accuracy_operation)
Beispiel #9
0
 def __dropout(self, input, name='dropout'):
     with tf.name_scope(name):
         return tf.nn.dropout(input,
                              keep_prob=self.dropout_keep_prob,
                              name='dropout')