def inference(self):
        """
        Build inference pipeline, going from the story and question, through the memory cells, to the
        distribution over possible answers.
        """

        # Story Input Encoder
        story_embeddings = tf.nn.embedding_lookup(self.E, self.S) # Shape: [None, story_len, sent_len, embed_sz]
        # story_embeddings = tf.nn.dropout(story_embeddings, self.keep_prob)               # Shape: [None, story_len, sent_len, embed_sz]
        story_embeddings = tf.multiply(story_embeddings, self.story_mask)
        self.story_embeddings = tf.reduce_sum(story_embeddings, axis=[2])                     # Shape: [None, story_len, embed_sz]

        # Query Input Encoder
        query_embedding = tf.nn.embedding_lookup(self.E, self.Q)  # Shape: [None, sent_len, embed_sz]
        query_embedding = tf.multiply(query_embedding, self.query_mask)                  # Shape: [None, sent_len, embed_sz]
        self.query_embedding = tf.reduce_sum(query_embedding, axis=[1])                       # Shape: [None, embed_sz]

        ## to input into a dynacmicRNN we need to specify the lenght of each sentence
        # length = tf.cast(tf.reduce_su2m(tf.sign(tf.reduce_max(tf.abs(self.S), axis=2)), axis=1), tf.int32)
       
       
        self.length = self.get_sequence_length()

        # Create Memory Cell
        self.cell = DynamicMemoryCell(self.num_blocks, self.embedding_size,
                                      self.keys, self.query_embedding)
        # self.cell =tf.contrib.rnn.DropoutWrapper(self.cell, output_keep_prob=self.keep_prob)

        # Send Story through Memory Cell
        initial_state = self.cell.zero_state(self.batch_size, dtype=tf.float32)
        self.out, memories = tf.nn.dynamic_rnn(self.cell, self.story_embeddings,
                                        sequence_length=self.length,
                                        initial_state=initial_state)

        # Output Module
        # stacked_memories = tf.stack(memories, axis=1)
        stacked_memories = tf.stack(tf.split(memories, self.num_blocks, 1), 1)


        # Generate Memory Scores
        p_scores = softmax(tf.reduce_sum(tf.multiply(stacked_memories, tf.expand_dims(self.query_embedding,1)), axis=[2])) # Shape: [None, mem_slots]
                                                      

        # Subtract max for numerical stability (softmax is shift invariant)
        p_max = tf.reduce_max(p_scores, axis=-1, keep_dims=True)
        attention = tf.nn.softmax(p_scores - p_max)
        attention = tf.expand_dims(attention, 2)                                         # Shape: [None, mem_slots, 1]


        # Weight memories by attention vectors
        u = tf.reduce_sum(tf.multiply(stacked_memories, attention), axis=1)          # Shape: [None, embed_sz]

        # Output Transformations => Logits
        hidden = self.activation(tf.matmul(u, self.H) + tf.squeeze(self.query_embedding))      # Shape: [None, embed_sz]
        logits = tf.matmul(hidden, self.R)
        return logits
    def __neural_network_model(self, user_class_weight):
        print('Building convolutional network...')

        X = tf.placeholder(tf.float32, [None, 12], name="X")
        Y = tf.placeholder(tf.float32, [None, 2], name="Y")

        MLP1 = fully_connected(X,
                               64,
                               activation=tf.nn.elu,
                               regularizer="L2",
                               name="MLP1")

        MLP2 = fully_connected(MLP1,
                               2,
                               activation='linear',
                               regularizer="L2",
                               name="MLP2")

        output = softmax(MLP2)
        accuracy = tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1)), tf.float32),
                                  name='acc')

        class_weights = tf.constant([[user_class_weight, 1.0]])
        # deduce weights for batch samples based on their true label
        weights = tf.reduce_sum(class_weights * Y, axis=1)
        # compute your (unweighted) softmax cross entropy loss
        unweighted_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=Y, logits=MLP2)
        # apply the weights, relying on broadcasting of the multiplication
        weighted_losses = unweighted_losses * weights
        # reduce the result to get your final loss
        loss = tf.reduce_mean(weighted_losses)

        return {
            'X': X,
            'Y': Y,
            'output': output,
            'accuracy': accuracy,
            'loss': loss
        }
Esempio n. 3
0
    def inference(self):
        """
        Build inference pipeline, going from the story and question, through the memory cells, to the
        distribution over possible answers.  
        """
        # Story Input Encoder
        story_embeddings = tf.nn.embedding_lookup(self.E, self.S)             # Shape: [None, story_len, sent_len, embed_sz]
        story_embeddings = tf.multiply(story_embeddings, self.story_mask)     # Shape: [None, story_len, sent_len, embed_sz]
        story_embeddings = tf.reduce_sum(story_embeddings, axis=[2])          # Shape: [None, story_len, embed_sz]

        # Query Input Encoder
        query_embedding = tf.nn.embedding_lookup(self.E, self.Q)              # Shape: [None, sent_len, embed_sz]
        query_embedding = tf.multiply(query_embedding, self.query_mask)       # Shape: [None, sent_len, embed_sz]
        query_embedding = tf.reduce_sum(query_embedding, axis=[1])            # Shape: [None, embed_sz]

        # Send Story through Memory Cell
        initial_state = self.cell.zero_state(self.bsz, dtype=tf.float32)
        _, memories = tf.nn.dynamic_rnn(self.cell, story_embeddings, sequence_length=self.S_len, 
                                        initial_state=initial_state)

        # Output Module 
        stacked_memories = tf.stack(memories, axis=1)
        
        # Generate Memory Scores
        p_scores = softmax(tf.reduce_sum(tf.multiply(stacked_memories,        # Shape: [None, mem_slots]
                                                     tf.expand_dims(query_embedding, 1)), axis=[2]))
        
        # Subtract max for numerical stability (softmax is shift invariant)
        p_max = tf.reduce_max(p_scores, axis=-1, keep_dims=True)
        attention = tf.nn.softmax(p_scores - p_max)       
        attention = tf.expand_dims(attention, 2)                              # Shape: [None, mem_slots, 1]

        # Weight memories by attention vectors
        u = tf.reduce_sum(tf.multiply(stacked_memories, attention), axis=1)   # Shape: [None, embed_sz]

        # Output Transformations => Logits
        hidden = prelu(tf.matmul(u, self.H) + query_embedding)                # Shape: [None, embed_sz]
        logits = tf.matmul(hidden, self.R)                                    # Shape: [None, vocab_sz]
        
        return logits
Esempio n. 4
0
def CRNN(window=1500,nLabels=3,downsampleSecond=True,featureMap=False):
	#Residual block function adapted from TFLearn:
	#https://github.com/tflearn/tflearn/blob/master/tflearn/layers/conv.py
	def residual_block_1D(incoming,out_channels,downsample=False, first=False, filt_len=16, dropout_prob=0.85, downsampleSecond=True):
		resnet = incoming
		in_channels = incoming.shape[-1].value
		strides = (2 if downsample else 1)
		dsLayer = (1 if downsampleSecond else 0)
		identity = resnet

		nConv = 2
		if first:
			resnet = conv_1d(resnet, out_channels, filt_len, strides,weights_init="variance_scaling")
			nConv = 1

		for i in range(nConv):
			resnet = batch_normalization(resnet)
			resnet = relu(resnet)
			resnet = dropout(resnet, dropout_prob)
			if downsample and i==dsLayer: #1 as in, second layer
				resnet = conv_1d(resnet,out_channels,filt_len, strides=1, weights_init="variance_scaling") #puts the downsampling on the first conv layer only
			else:
				resnet = conv_1d(resnet,out_channels,filt_len, strides, weights_init="variance_scaling")

		#Beginning of skip connection
		identity = max_pool_1d(identity,strides, strides)

		if in_channels != out_channels:

			ch = (out_channels - in_channels) // 2
			identity = tf.pad(identity,[[0,0],[0,0],[ch,ch]])
			in_channels = out_channels

		resnet = resnet + identity
		
		return resnet

	#Begin construction of network
	net = input_data(shape=[None, window, 1])

	net = conv_1d(net, 64, 16, weights_init="variance_scaling")
	net = batch_normalization(net)
	net = relu(net)

	dropoutProb = 0.5
	net = residual_block_1D(net, 64, first=True, dropout_prob=dropoutProb)

	for i in range(0,4):
		downsample = (i%2 == 0)
		k = ((i+1)//4)+1
		net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb)
	
	res1 = net

	for i in range(4, 8):
		downsample = (i%2 == 0)
		k = ((i+1)//4)+1
		net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb)
	res2 = net

	for i in range(8,12):
		downsample = (i%2 == 0)
		k = ((i+1)//4)+1
		net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb)
	res3 = net

	for i in range(12, 15):
		downsample = (i%2 == 0)
		k = ((i+1)//4)+1
		net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb)
	res4 = net

	net = batch_normalization(net)
	net = relu(net)
	
	net = fully_connected(net, nLabels)
	net = softmax(net)
	
	net = regression(net, optimizer='adam',loss='categorical_crossentropy',learning_rate=0.001, shuffle_batches=False)

	#Return intermediary activations
	if featureMap:
		return res1, res2, res3, res4, net
	else:
		return net
Esempio n. 5
0
# Convolutional network building
net = input_data(shape=[None, 32, 32, 3],
                 data_preprocessing=img_prep,
                 data_augmentation=img_aug)

filters = [64, 128, 256, 512]
for f in filters:
    net = fractal_conv2d(net, 4, f, 3, normalizer_fn=batch_normalization)
    net = slim.max_pool2d(net, 2, 2)

net = fractal_conv2d(net, 4, 512, 2, normalizer_fn=batch_normalization)

net = conv_2d(net, 10, 1)
net = global_avg_pool(net)
net = softmax(net)

net = regression(net,
                 optimizer='adam',
                 loss='categorical_crossentropy',
                 learning_rate=.002)

# Train using classifier
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X,
          Y,
          n_epoch=400,
          shuffle=True,
          validation_set=(X_test, Y_test),
          show_metric=True,
          batch_size=32,
Esempio n. 6
0
img_aug.add_random_flip_leftright()
img_aug.add_random_rotation(max_angle=25.)

# Convolutional network building
net = input_data(shape=[None, 32, 32, 3],
                     data_preprocessing=img_prep,
                     data_augmentation=img_aug)

filters = [64,128,256,512]
for f in filters:
  net = fractal_conv2d(net, 4, f, 3,
                       normalizer_fn=batch_normalization)
  net = slim.max_pool2d(net,2, 2)

net = fractal_conv2d(net, 4, 512, 2,
                     normalizer_fn=batch_normalization)


net = conv_2d(net, 10, 1)
net = global_avg_pool(net)
net = softmax(net)

net = regression(net, optimizer='adam',
                     loss='categorical_crossentropy',
                 learning_rate=.002)

# Train using classifier
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=400, shuffle=True, validation_set=(X_test, Y_test),
          show_metric=True, batch_size=32, run_id='cifar10_cnn')