def _get_input_tensor_variables(self):
     # x_w: 1D: batch, 2D: n_prds, 3D: n_words, 4D: 5 + window; elem=word id
     # x_p: 1D: batch, 2D: n_prds, 3D: n_words; elem=posit id
     # y: 1D: batch, 2D: n_prds, 3D: n_words; elem=label id
     if self.argv.mark_phi:
         return [T.itensor4('x_w'), T.itensor3('x_p'), T.itensor3('y')]
     return [T.itensor4('x_w'), T.itensor3('y')]
def build(word_embeddings, len_voc, word_emb_dim, args, freeze=False):
	# input theano vars
	posts = T.imatrix()
	post_masks = T.fmatrix()
	ques_list = T.itensor4()
	ques_masks_list = T.ftensor4()
	ans_list = T.itensor4()
	ans_masks_list = T.ftensor4()
	labels = T.imatrix()
	N = args.no_of_candidates

	post_out, post_lstm_params = build_lstm(posts, post_masks, args.post_max_len, \
											word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)	
	ques_out, ques_lstm_params = build_list_lstm_multiqa(ques_list, ques_masks_list, N, args.ques_max_len, \
														word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
	ans_out, ans_lstm_params = build_list_lstm_multiqa(ans_list, ans_masks_list, N, args.ans_max_len, \
														word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
	
	pq_out, pq_a_loss, ques_squared_errors, pq_a_squared_errors, post_ques_dense_params = answer_model(post_out, ques_out, ans_out, labels, args)
	#pa_preds, pa_loss, post_ans_dense_params = utility_calculator(post_out, ans_out, labels, args)	
	pa_preds, pa_loss, post_ans_dense_params = utility_calculator(post_out, ques_out, ans_out, labels, args)	

	all_params = post_lstm_params + ques_lstm_params + ans_lstm_params + post_ques_dense_params + post_ans_dense_params
	
	loss = pq_a_loss + pa_loss	
	loss += args.rho * sum(T.sum(l ** 2) for l in all_params)

	updates = lasagne.updates.adam(loss, all_params, learning_rate=args.learning_rate)
	
	train_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \
									[loss, pq_a_loss, pa_loss] + pq_out + pq_a_squared_errors + ques_squared_errors + pa_preds, updates=updates)
	test_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \
									[loss, pq_a_loss, pa_loss] + pq_out + pq_a_squared_errors + ques_squared_errors + pa_preds,)
	return train_fn, test_fn
def build_pqa_model(word_embeddings,
                    len_voc,
                    word_emb_dim,
                    N,
                    args,
                    freeze=False):

    # input theano vars
    posts = T.imatrix()
    post_masks = T.fmatrix()
    ques_list = T.itensor4()
    ques_masks_list = T.ftensor4()
    ans_list = T.itensor4()
    ans_masks_list = T.ftensor4()
    labels = T.imatrix()

    post_out, post_lstm_params = build_lstm_posts(posts, post_masks, args.post_max_len, \
                 word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
    ques_out, ques_lstm_params = build_lstm(ques_list, ques_masks_list, N, args.ques_max_len, \
              word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
    ans_out, ans_lstm_params = build_lstm(ans_list, ans_masks_list, N, args.ans_max_len, \
              word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)

    pqa_preds, post_ques_ans_dense_params = get_pqa_preds(
        post_out, ques_out, ans_out, N, args)
    loss = 0.0
    for i in range(N):
        loss += T.sum(
            lasagne.objectives.binary_crossentropy(pqa_preds[i * N + i],
                                                   labels[:, i]))

    # squared_errors = [None]*(N*N)
    # for i in range(N):
    # 	for j in range(N):
    # 		squared_errors[i*N+j] = lasagne.objectives.squared_error(ans_out[i][0], ans_out[i][j])

    all_params = post_lstm_params + ques_lstm_params + ans_lstm_params + post_ques_ans_dense_params

    loss += args.rho * sum(T.sum(l**2) for l in all_params)

    updates = lasagne.updates.adam(loss,
                                   all_params,
                                   learning_rate=args.learning_rate)

    train_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \
            [loss] + pqa_preds, updates=updates)
    dev_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \
            [loss] + pqa_preds,)

    return train_fn, dev_fn
Beispiel #4
0
 def make_node(self, x, x2, x3, x4, x5):
     # check that the theano version has support for __props__.
     # This next line looks like it has a typo,
     # but it's actually a way to detect the theano version
     # is sufficiently recent to support the use of __props__.
     assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
     x = tensor.as_tensor_variable(x)
     x2 = tensor.as_tensor_variable(x2)
     x3 = tensor.as_tensor_variable(x3)
     x4 = tensor.as_tensor_variable(x4)
     x5 = tensor.as_tensor_variable(x5)
     
     if prm.att_doc:
         if prm.compute_emb:
             td = tensor.itensor4().type()
         else:
             td = tensor.ftensor4().type()
         tm = tensor.ftensor3().type()
     else:
         if prm.compute_emb:
             td = tensor.itensor3().type()
         else:
             td = tensor.ftensor3().type()
         tm = tensor.fmatrix().type()
     return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \
                                        tensor.fmatrix().type(), tensor.ivector().type()])
def create_theano_function(word_embed, char_embed, values=None):
    char_x = T.itensor4('char_x')
    word_x = T.itensor3('word_x')
    word_mask = T.tensor3('word_mask')
    sent_mask = T.matrix('sent_mask')
    doc_linguistic_x = T.matrix('doc_linguistic')
    label_y = T.ivector('label_y')
    att_out, network_output, loss = fn.build_fn(word_x=word_x, char_x=char_x, word_mask=word_mask, sent_mask=sent_mask,
                                                label_y=label_y, word_embed=word_embed, char_embed=char_embed,
                                                args=args, doc_ling=doc_linguistic_x)
    if values is not None:
        lasagne.layers.set_all_param_values(network_output, values, trainable=True)

    params = lasagne.layers.get_all_params(network_output, trainable=True)
    if args.optimizer == 'sgd':
        updates = lasagne.updates.sgd(loss, params, args.learning_rate)
    elif args.optimizer == 'momentum':
        updates = lasagne.updates.momentum(loss, params, args.learning_rate)
    train_fn = theano.function([word_x, char_x, word_mask, sent_mask, doc_linguistic_x, label_y],
                               loss, updates=updates)

    prediction = lasagne.layers.get_output(network_output, deterministic=True)
    eval_fn = theano.function([word_x, char_x, word_mask, sent_mask, doc_linguistic_x],
                              prediction)
    fn_check_attention = theano.function([word_x, char_x, word_mask, sent_mask],
                                         att_out)
    return fn_check_attention, eval_fn, train_fn, params
Beispiel #6
0
    def make_node(self, x, x2, x3, x4, x5):
        # check that the theano version has support for __props__.
        # This next line looks like it has a typo,
        # but it's actually a way to detect the theano version
        # is sufficiently recent to support the use of __props__.
        assert hasattr(
            self, '_props'
        ), "Your version of theano is too old to support __props__."
        x = tensor.as_tensor_variable(x)
        x2 = tensor.as_tensor_variable(x2)
        x3 = tensor.as_tensor_variable(x3)
        x4 = tensor.as_tensor_variable(x4)
        x5 = tensor.as_tensor_variable(x5)

        if prm.att_doc:
            if prm.compute_emb:
                td = tensor.itensor4().type()
            else:
                td = tensor.ftensor4().type()
            tm = tensor.ftensor3().type()
        else:
            if prm.compute_emb:
                td = tensor.itensor3().type()
            else:
                td = tensor.ftensor3().type()
            tm = tensor.fmatrix().type()
        return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \
                                           tensor.fmatrix().type(), tensor.ivector().type()])
Beispiel #7
0
def ndim_itensor(ndim, name=None):
    if ndim == 2:
        return T.imatrix(name)
    elif ndim == 3:
        return T.itensor3(name)
    elif ndim == 4:
        return T.itensor4(name)
    return T.imatrix(name=name)
Beispiel #8
0
def ndim_itensor(ndim, name=None):
    if ndim == 2:
        return T.imatrix(name)
    elif ndim == 3:
        return T.itensor3(name)
    elif ndim == 4:
        return T.itensor4(name)
    return T.imatrix(name=name)
Beispiel #9
0
def build_image_only_network(d_word, d_hidden, lr, eps=1e-6):

    # input theano vars
    in_context_fc7 = T.tensor3(name='context_images')
    in_context_bb = T.tensor4(name='context_bb')
    in_bbmask = T.tensor3(name='bounding_box_mask')
    in_context = T.itensor4(name='context')
    in_cmask = T.tensor4(name='context_mask')
    in_answer_fc7 = T.matrix(name='answer_images')
    in_answer_bb = T.matrix(name='answer_bb')
    in_answers = T.itensor3(name='answers')
    in_amask = T.tensor3(name='answer_mask')
    in_labels = T.imatrix(name='labels')

    # define network
    l_context_fc7 = lasagne.layers.InputLayer(shape=(None, 3, 4096),
                                              input_var=in_context_fc7)
    l_answers = lasagne.layers.InputLayer(shape=(None, 3, max_words),
                                          input_var=in_answers)
    l_amask = lasagne.layers.InputLayer(shape=l_answers.shape,
                                        input_var=in_amask)

    # contexts and answers should share embeddings
    l_answer_emb = lasagne.layers.EmbeddingLayer(l_answers, len_voc, d_word)

    l_context_proj = lasagne.layers.DenseLayer(
        l_context_fc7,
        num_units=d_hidden,
        nonlinearity=lasagne.nonlinearities.rectify,
        num_leading_axes=2)
    l_context_final_reps = lasagne.layers.LSTMLayer(l_context_proj,
                                                    num_units=d_hidden,
                                                    only_return_final=True)
    l_ans_reps = SumAverageLayer([l_answer_emb, l_amask],
                                 compute_sum=True,
                                 num_dims=3)
    l_scores = InnerProductLayer([l_context_final_reps, l_ans_reps])

    preds = lasagne.layers.get_output(l_scores)
    loss = T.mean(lasagne.objectives.categorical_crossentropy(
        preds, in_labels))

    all_params = lasagne.layers.get_all_params(l_scores, trainable=True)
    updates = lasagne.updates.adam(loss, all_params, learning_rate=lr)
    train_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask, in_labels
    ],
                               loss,
                               updates=updates,
                               on_unused_input='warn')
    pred_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask
    ],
                              preds,
                              on_unused_input='warn')
    return train_fn, pred_fn, l_scores
Beispiel #10
0
def train_model_res_V1(results_path, fine_tune=False, batch_size=5, base_lr=0.001, n_epochs=30):

    ftensor5 = T.TensorType('float32', (False,)*5)
    x = ftensor5()
    y = T.itensor4('y')

    network, params, l2_penalty = build_res_V1(x, batch_size)
    
    train_cost = []
    
    if fine_tune is True: # Fine tune the model if this flag is True
        with np.load(os.path.join(results_path,'params.npz')) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            set_all_param_values(network['output'], param_values[0])
            print 'initialization done!'

    prediction = get_output(network['output'])
    loss_layer = LogisticRegression(prediction)
    cost_output = loss_layer.negative_log_likelihood(y)

    lamda=0.0001
    cost = cost_output + lamda * l2_penalty
    updates = lasagne.updates.adadelta(cost, params)
    train = theano.function([x, y], [cost, cost_output], updates=updates)

    print 'function graph done!'

    itr = 0
    test_min = np.inf
    train_cost = []

    data_folder = '/DATA/PATH'
    file_name = results_path + "/log_loss.txt"
    fw = codecs.open(file_name, "w", "utf-8-sig")
    for train_x, train_y in load_train_negative(batch_size=batch_size, n_epochs=n_epochs, patchSize=[48,48,16]):
        print 'train_x shape: {}, positive percentage: {}'.format(train_x.shape, np.mean(train_y))
        n_train_batches = train_x.shape[0] / batch_size
        for minibatch_index in xrange(n_train_batches):
            
            train_x_itr = train_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size,:,:,:]
            train_y_itr = train_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size,:,:,:]
            train_cost_itr, train_cost_itr_classify = train(train_x_itr, train_y_itr)

            train_cost.append([train_cost_itr,train_cost_itr_classify])

            print 'model: {}, itr: {}, train loss overall: {}, train loss classify: {}'.format('resV1', itr, train_cost_itr, train_cost_itr_classify)
            print >> fw, 'model: {}, itr: {}, train loss overall: {}, train loss classify: {}'.format('resV1', itr, train_cost_itr, train_cost_itr_classify)
            itr = itr + 1


        if itr % 200 == 0:
            np.savez(os.path.join(results_path, 'params_'+str(itr)+'.npz'), get_all_param_values(network['output']))
            print 'save model done ...'
    fw.close()
Beispiel #11
0
def attention():
#    q = T.fmatrix('q')
#    C = T.ftensor4('C')
    q = T.imatrix('q')
    C = T.itensor4('C')

    d = 2
    W1_c = theano.shared(np.random.randint(-3, 3, (d, d)))
#    W1_c = theano.shared(np.ones((d, d), dtype='int32'))
    W1_h = theano.shared(np.random.randint(-3, 3, (d, d)))
#    W1_h = theano.shared(np.ones((d, d), dtype='int32'))
    w    = theano.shared(np.ones((d,), dtype='float32'))
    W2_r = theano.shared(np.random.randint(-1, 1, (d, d)))
    W2_h = theano.shared(np.random.randint(-1, 1, (d, d)))
#    W2_r = theano.shared(np.ones((d, d), dtype='float32'))
#    W2_h = theano.shared(np.ones((d, d), dtype='float32'))

#    q_in = np.asarray([[1, 2], [3, 4], [-1, -2], [-3, -4]], dtype='int32')
    q_in = np.asarray([[1, 2]], dtype='int32')
#    q_in = np.asarray([[1, 2], [3, 4], [5, 6], [7, 8]], dtype='float32')
    C_in = np.ones((1, 3, 3, 2), dtype='int32')
#    C_in = np.ones((4, 3, 3, 2), dtype='int32')
#    C_in = np.asarray(np.random.randint(-2, 2, (1, 3, 3, 2)), dtype='int32')

    def forward(h_before, _C, eps=1e-8):
        # C: n_queries * n_cands * n_words * dim_h
        # h: n_queries * dim_h

#        M = T.tanh(T.dot(_C, W1_c) + T.dot(h_before, W1_h).dimshuffle(0, 'x', 'x', 1))
        M = T.dot(_C, W1_c) + T.dot(h_before, W1_h).dimshuffle(0, 'x', 'x', 1)  # 4 * 3 * 3 * 2
#        M = T.dot(h_before, W1_h).dimshuffle(0, 'x', 'x', 1)

        # batch * len * 1
        alpha = T.exp(T.dot(M, w))  # 4 * 3 * 3
#        alpha = T.nnet.softmax(T.dot(M, w))  # 4 * 3 * 3
        alpha /= T.sum(alpha, axis=2, keepdims=True) + eps
#        alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], 1))
        alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], alpha.shape[2], 1))

        # batch * d
#        r = T.sum(_C * alpha, axis=1)
        r_in = _C * alpha
        r = T.sum(r_in, axis=1)  # 4 * 3 * 2

        # batch * d
        h_after = T.dot(r, W2_r) + T.dot(h_before, W2_h).dimshuffle((0, 'x', 1))  # 4 * 3 * 2
#        return h_after
        return h_after, r, alpha, M

    y, a, b, m = forward(q, C)
    f = theano.function(inputs=[q, C], outputs=[y, a, b, m], on_unused_input='ignore')
    print f(q_in, C_in)
Beispiel #12
0
def main(data_path, model_path, save_path):

    print("Preparing Data...")

    # Load data and dictionary
    X = []
    with io.open(data_path,'r',encoding='utf-8') as f:
        for line in f:
            X.append(line.rstrip('\n'))
    with open('%s/dict.pkl' % model_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # Prepare data for encoding
    batches = Batch(X)

    # Load model
    print("Loading model params...")
    params = load_params('%s/model.npz' % model_path)

    # Build encoder
    print("Building encoder...")

    # Theano variables
    tweet = T.itensor4()
    t_mask = T.ftensor3()

    # Embeddings
    emb_t = char2word2vec(tweet, t_mask, params, n_char)[0]

    # Theano function
    f_enc = theano.function([tweet, t_mask], emb_t)

    # Encode
    print("Encoding data...")
    print("Input data {} samples".format(len(X)))
    features = np.zeros((len(X),SDIM), dtype='float32')
    it = 0
    for x,i in batches:
        if it % 100 == 0:
            print("Minibatch {}".format(it))
        it += 1

        xp, x_mask = prepare_data_c2w2s(x, chardict)
        ff = f_enc(xp, x_mask)
        for ind, idx in enumerate(i):
            features[idx] = ff[ind]

    # Save
    with open(save_path, 'w') as o:
        np.save(o, features)
Beispiel #13
0
	def __init__(self, name = "CIFAR10.pixelCNN", input_dim = 3, dims = 32, q_levels = 256, layers = 3,
				grad_clip = 1):
		# self.model = Model(name = model_name)
		self.name = name
		self.grad_clip = grad_clip
		self.is_train = T.scalar()
		self.X = T.tensor4('X') # shape: (batchsize, channels, height, width)
		self.X_r = T.itensor4('X_r')
		# print self.X.shape
		# return 
		self.X_transformed = self.X_r.dimshuffle(0,2,3,1)
		self.input_layer = WrapperLayer(self.X.dimshuffle(0,2,3,1)) # input reshaped to (batchsize, height, width,3)
		self.q_levels = q_levels
		self.pixel_CNN = pixelConv(
			self.input_layer, 
			input_dim, 
			dims,
			Q_LEVELS = q_levels,
			name = self.name + ".pxCNN",
			num_layers = layers,
			)
		print "done1"
		self.params = self.pixel_CNN.get_params()
		self.output_probab = Softmax(self.pixel_CNN).output()
		print "done2"
		self.cost = T.nnet.categorical_crossentropy(
			self.output_probab.reshape((-1,self.output_probab.shape[self.output_probab.ndim - 1])),
			self.X_r.flatten()
			).mean()
		self.output_image = sample_from_softmax(self.output_probab)
		print "done3"
		grads = T.grad(self.cost, wrt=self.params, disconnected_inputs='warn')
		self.grads = [T.clip(g, floatX(-grad_clip), floatX(grad_clip)) for g in grads]
		print "done5"
		# learning_rate = T.scalar('learning_rate')
		self.updates = lasagne.updates.adam(self.grads, self.pixel_CNN.get_params(), learning_rate = 1e-3)
		print "d6"
		self.train_fn = theano.function([self.X, self.X_r], self.cost, updates = self.updates)
		print "done4"
		self.valid_fn = theano.function([self.X, self.X_r], self.cost)
		print "go to hell"
		self.generate_routine = theano.function([self.X], self.output_image)

		self.errors = {'training' : [], 'validation' : []}
		print "yo"
Beispiel #14
0
def add_datasets_to_graph(list_of_datasets, list_of_names, graph, strict=True,
                          list_of_test_values=None):
    assert type(graph) is OrderedDict
    datasets_added = []
    for n, (dataset, name) in enumerate(safe_zip(list_of_datasets,
                                                 list_of_names)):
        if dataset.dtype != "int32":
            if len(dataset.shape) == 1:
                sym = tensor.vector()
            elif len(dataset.shape) == 2:
                sym = tensor.matrix()
            elif len(dataset.shape) == 3:
                sym = tensor.tensor3()
            elif len(dataset.shape) == 4:
                sym = tensor.tensor4()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        elif dataset.dtype == "int32":
            if len(dataset.shape) == 1:
                sym = tensor.ivector()
            elif len(dataset.shape) == 2:
                sym = tensor.imatrix()
            elif len(dataset.shape) == 3:
                sym = tensor.itensor3()
            elif len(dataset.shape) == 4:
                sym = tensor.itensor4()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        else:
            raise ValueError("dataset %s has unsupported dtype %s" % (
                name, dataset.dtype))
        if list_of_test_values is not None:
            sym.tag.test_value = list_of_test_values[n]
        tag_expression(sym, name, dataset.shape)
        datasets_added.append(sym)
    if DATASETS_ID not in graph.keys():
        graph[DATASETS_ID] = []
    graph[DATASETS_ID] += datasets_added
    if len(list_of_datasets) == 1:
        # Make it easier if you only added a single dataset
        datasets_added = datasets_added[0]
    return datasets_added
Beispiel #15
0
def char_hierarchical_doc_fn(args, word_embed, char_embed, values=None):
    char_x = T.itensor4('char_x')
    word_x = T.itensor3('word_x')
    word_mask = T.tensor3('word_mask')
    sent_mask = T.matrix('sent_mask')
    doc_linguistic_x = T.matrix('doc_linguistic')
    label_y = T.ivector('label_y')

    char_input_layer = lasagne.layers.InputLayer(shape=(None, args.max_sent,
                                                        args.max_word,
                                                        args.max_char),
                                                 input_var=char_x)
    word_input_layer = lasagne.layers.InputLayer(shape=(None, args.max_sent,
                                                        args.max_word),
                                                 input_var=word_x)
    word_mask_layer = lasagne.layers.InputLayer(shape=(None, args.max_sent,
                                                       args.max_word),
                                                input_var=word_mask)
    word_mask_layer = lasagne.layers.reshape(word_mask_layer, (-1, [2]))
    sent_mask_layer = lasagne.layers.InputLayer(shape=(None, args.max_sent),
                                                input_var=sent_mask)
    doc_linguistic_layer = lasagne.layers.InputLayer(
        shape=(None, args.max_ling), input_var=doc_linguistic_x)

    char_cnn = networks.char_cnn(char_input_layer, args.num_filter,
                                 args.conv_window, char_embed, args)
    word_rnn = networks.word_rnn(word_input_layer, word_mask_layer, word_embed,
                                 args, char_cnn)
    if args.dropout_rate > 0:
        word_rnn = lasagne.layers.dropout(word_rnn, p=args.dropout_rate)

    if args.word_att == 'avg':
        word_output = networks.AveragePooling(word_rnn, mask=word_mask_layer)
    elif args.word_att == 'last':
        word_output = word_rnn
    elif args.word_att == 'dot':
        word_att = lasagne.layers.DenseLayer(
            word_rnn,
            num_units=2 * args.hidden_size,
            num_leading_axes=-1,
            nonlinearity=lasagne.nonlinearities.tanh)
        word_att = networks.Attention(word_att,
                                      num_units=2 * args.hidden_size,
                                      mask=word_mask_layer)
        word_output = networks.AttOutput([word_rnn, word_att])

    word_output = lasagne.layers.reshape(word_output, (-1, args.max_sent, [1]))
    sent_rnn = networks.sent_rnn(word_output, sent_mask_layer, args)
    if args.dropout_rate > 0:
        sent_rnn = lasagne.layers.dropout(sent_rnn, p=args.dropout_rate)
    sent_input = lasagne.layers.DenseLayer(
        sent_rnn,
        2 * args.hidden_size,
        num_leading_axes=-1,
        nonlinearity=lasagne.nonlinearities.tanh)

    sent_att = networks.Attention(sent_input,
                                  num_units=2 * args.hidden_size,
                                  mask=sent_mask_layer)

    att_out = lasagne.layers.get_output(sent_att, deterministic=True)
    fn_check_attention = theano.function(
        [char_x, word_x, word_mask, sent_mask], att_out)

    sent_output = networks.AttOutput([sent_rnn, sent_att])

    if args.doc_ling_nonlinear:
        doc_linguistic_layer = lasagne.layers.DenseLayer(
            doc_linguistic_layer,
            60,
            num_leading_axes=-1,
            nonlinearity=lasagne.nonlinearities.rectify)
    if args.dropout_rate > 0:
        doc_linguistic_layer = lasagne.layers.dropout(doc_linguistic_layer,
                                                      p=args.dropout_rate)

    sent_output = lasagne.layers.ConcatLayer(
        [sent_output, doc_linguistic_layer], axis=-1)
    network_output = lasagne.layers.DenseLayer(
        sent_output, num_units=1, nonlinearity=lasagne.nonlinearities.sigmoid)
    regularization = lasagne.regularization.regularize_layer_params(
        network_output, penalty=lasagne.regularization.l2)
    train_pred = lasagne.layers.get_output(network_output)
    loss = lasagne.objectives.binary_crossentropy(
        train_pred, label_y).mean() + regularization * 0.0001

    if values is not None:
        lasagne.layers.set_all_param_values(network_output,
                                            values,
                                            trainable=True)

    params = lasagne.layers.get_all_params(network_output, trainable=True)

    if args.optimizer == 'sgd':
        updates = lasagne.updates.sgd(loss, params, args.learning_rate)
    elif args.optimizer == 'momentum':
        updates = lasagne.updates.momentum(loss, params, args.learning_rate)

    train_fn = theano.function(
        [word_x, char_x, word_mask, sent_mask, doc_linguistic_x, label_y],
        loss,
        updates=updates)

    prediction = lasagne.layers.get_output(network_output, deterministic=True)
    eval_fn = theano.function(
        [word_x, char_x, word_mask, sent_mask, doc_linguistic_x], prediction)
    return fn_check_attention, eval_fn, train_fn, params
Beispiel #16
0
    def config_theano(self):
        ##################################################################
        ########################### NOT USING NOW ########################
        ##################################################################
        # snapshot and change
        snapshot = T.itensor3('snapshot')
        change_label = T.fmatrix('change_label')

        ##################################################################
        ##################################################################
        ##################################################################
        # trade-off hyperparameters
        _lambda = 0.1
        _alpha = 0.1
        _avgLen = 20.

        # regularization and learning rate
        lr = T.scalar('lr')
        reg = T.scalar('reg')
        beta = T.scalar('beta')

        # semantics
        inf_trk_labels = T.fmatrix('inf_trk_labels')
        req_trk_labels = T.fmatrix('req_trk_labels')

        # DB matching degree
        db_degrees = T.fmatrix('db_degrees')

        # source and target utts
        source = T.imatrix('source')
        target = T.imatrix('target')
        source_len = T.ivector('source_len')
        target_len = T.ivector('target_len')
        utt_group = T.ivector('utt_group')

        # masked source and target utts
        masked_source = T.imatrix('masked_source')
        masked_target = T.imatrix('masked_target')
        masked_source_len = T.ivector('masked_source_len')
        masked_target_len = T.ivector('masked_target_len')

        # tracker features, either n-grams or delexicalised position
        srcfeat = T.itensor4('srcfeat')
        tarfeat = T.itensor4('tarfeat')

        # external samples
        success_rewards = T.fvector('success_reward')
        samples = T.ivector('samples')

        # for numerical stability
        epsln = 1e-10

        # dialog level recurrence
        def dialog_recur(source_t, target_t, source_len_t, target_len_t,
                         masked_source_t, masked_target_t, masked_source_len_t,
                         masked_target_len_t, utt_group_t, snapshot_t,
                         success_reward_t, sample_t, change_label_t,
                         db_degree_t, inf_label_t, req_label_t, source_feat_t,
                         target_feat_t, belief_tm1, masked_target_tm1,
                         masked_target_len_tm1, target_feat_tm1,
                         posterior_tm1):

            ##############################################################
            ##################### Intent encoder #########################
            ##############################################################
            # Intent encoder
            if self.enc == 'lstm':
                masked_intent_t = bidirectional_encode(self.fEncoder,
                                                       self.bEncoder,
                                                       masked_source_t,
                                                       masked_source_len_t)

            ##############################################################
            ########## Belief tracker, informable + requestable ##########
            ##############################################################
            # cost placeholder for accumulation
            print '\tloss function'
            loss_t = theano.shared(np.zeros((1),
                                            dtype=theano.config.floatX))[0]
            companion_loss_t = theano.shared(
                np.zeros((1), dtype=theano.config.floatX))[0]
            prior_loss_t = theano.shared(
                np.zeros((1), dtype=theano.config.floatX))[0]
            posterior_loss_t = theano.shared(
                np.zeros((1), dtype=theano.config.floatX))[0]
            base_loss_t = theano.shared(
                np.zeros((1), dtype=theano.config.floatX))[0]
            # other information to store
            dtmp = 1  #if self.vae_train=='sample' else self.dl
            reward_t = theano.shared(
                np.zeros((dtmp), dtype=theano.config.floatX))
            baseline_t = theano.shared(
                np.zeros((1), dtype=theano.config.floatX))[0]
            posterior_t = theano.shared(
                np.zeros((self.dl), dtype=theano.config.floatX))[0]

            # Informable slot belief tracker
            # belief vector
            belief_t = []

            if self.trk == 'rnn' and self.inf == True:
                for i in range(len(self.infotrackers)):
                    # slice the current belief tracker output
                    cur_belief_tm1 = belief_tm1[self.iseg[i]:self.iseg[i + 1]]
                    if self.trkenc == 'cnn':  # cnn, position features
                        ssrcpos_js = source_feat_t[
                            0, self.iseg[i]:self.iseg[i + 1], :]
                        vsrcpos_js = source_feat_t[
                            1, self.iseg[i]:self.iseg[i + 1], :]
                        starpos_jm1s = target_feat_tm1[
                            0, self.iseg[i]:self.iseg[i + 1], :]
                        vtarpos_jm1s = target_feat_tm1[
                            1, self.iseg[i]:self.iseg[i + 1], :]

                        # tracking
                        cur_belief_t = self.infotrackers[i].recur(
                            cur_belief_tm1, masked_source_t, masked_target_tm1,
                            masked_source_len_t, masked_target_len_tm1,
                            ssrcpos_js, vsrcpos_js, starpos_jm1s, vtarpos_jm1s)

                    # semi label
                    cur_label_t = inf_label_t[self.iseg[i]:self.iseg[i + 1]]
                    # include cost if training tracker
                    if self.learn_mode == 'all' or self.learn_mode == 'trk':
                        print '\t\tincluding informable  tracker loss ...'
                        loss_t += -T.sum(
                            cur_label_t * T.log10(cur_belief_t + epsln))

                    # accumulate belief vector
                    if self.bef == 'full':
                        belief_t.append(cur_label_t)
                    else:
                        # summary belief
                        tmp  = [T.sum(  cur_label_t[:-2],axis=0).dimshuffle('x'),\
                                        cur_label_t[-2].dimshuffle('x')]
                        tmp  = tmp + [cur_label_t[-1].dimshuffle('x')] if\
                                self.bef=='summary' else tmp
                        cur_sum_belief_t = T.concatenate(tmp, axis=0)
                        belief_t.append(cur_sum_belief_t)

            inf_belief_t = inf_label_t

            # Requestable slot belief tracker
            if self.trk == 'rnn' and self.req == True:
                for i in range(len(self.rseg) - 1):
                    # current feature index
                    bn = self.iseg[-1] + 2 * i
                    if self.trkenc == 'cnn':  # cnn, position features
                        ssrcpos_js = source_feat_t[0, bn, :]
                        vsrcpos_js = source_feat_t[1, bn, :]
                        starpos_jm1s = target_feat_tm1[0, bn, :]
                        vtarpos_jm1s = target_feat_tm1[1, bn, :]
                        # tracking
                        cur_belief_t = self.reqtrackers[i].recur(
                            masked_source_t, masked_target_tm1,
                            masked_source_len_t, masked_target_len_tm1,
                            ssrcpos_js, vsrcpos_js, starpos_jm1s, vtarpos_jm1s)

                    # semi label
                    cur_label_t = req_label_t[2 * i:2 * (i + 1)]
                    # include cost if training tracker
                    if self.learn_mode == 'all' or self.learn_mode == 'trk':
                        print '\t\tincluding requestable tracker loss ...'
                        loss_t += -T.sum(
                            cur_label_t * T.log10(cur_belief_t + epsln))
                    # accumulate belief vector
                    if self.bef == 'full':
                        belief_t.append(cur_label_t)
                    else:
                        tmp = cur_label_t if self.bef == 'summary' else cur_label_t[:
                                                                                    1]
                        belief_t.append(tmp)

                # offer-change tracker
                minus1 = -T.ones((1), dtype='int32')
                cur_belief_t = self.changeTracker.recur(
                    masked_source_t, masked_target_tm1, masked_source_len_t,
                    masked_target_len_tm1, minus1, minus1, minus1, minus1)
                # cost function
                if self.learn_mode == 'trk' or self.learn_mode == 'all':
                    print '\t\tincluding OfferChange tracker loss ...'
                    loss_t += -T.sum(
                        change_label_t * T.log10(cur_belief_t + epsln))
                # accumulate belief vector
                if self.bef == 'full':
                    belief_t.append(change_label_t)
                else:
                    tmp = change_label_t[:1] if self.bef=='simplified' \
                            else change_label_t
                    belief_t.append(tmp)

            ##############################################################
            ######################## LSTM decoder ########################
            ##############################################################
            bef_t = T.concatenate(belief_t, axis=0)
            # LSTM decoder
            if self.dec == 'lstm' and self.learn_mode != 'trk':
                prob_t, snapCost_t, prior_t, posterior_t, z_t, base_t, debugX = \
                    self.decoder.decode(
                        masked_source_t, masked_source_len_t,
                        masked_target_t, masked_target_len_t,
                        masked_intent_t, belief_t, db_degree_t[-6:],
                        utt_group_t, snapshot_t, sample_t)
                debug_t = prior_t

                # decoder loss
                if self.ply != 'latent':  # deterministic policy
                    print '\t\tincluding decoder loss ...'
                    loss_t += -T.sum(T.log10(prob_t + epsln))
                else:  # variational policy
                    # disconnet gradient flow
                    P = G.disconnected_grad(prior_t)
                    Q = G.disconnected_grad(posterior_t)
                    Qtm1 = G.disconnected_grad(posterior_tm1)

                    # prior network loss
                    if self.learn_mode == 'rl':  # rl fine-tuning
                        print '\t\tincluding RL success reward for fine-tine policy ...'
                        prior_loss_t = -success_reward_t * T.log10(prior_t +
                                                                   epsln)[z_t]
                    else:  # neural variational inference
                        # encoder loss, minimising KL(Q|P) and self-supervised action
                        print '\t\tinclding KL(Q|Pi) to train policy network Pi ...'
                        prior_loss_t = -T.switch(
                            T.lt(utt_group_t, self.dl - 1),
                            T.log10(prior_t + epsln)[z_t],
                            _alpha * T.sum(Q * (T.log10(prior_t + epsln) -
                                                T.log10(Q + epsln))))

                        # decoder loss for current sample/ground truth
                        print '\t\tincluding decoder loss ...'
                        loss_t = -T.sum(T.log10(prob_t + epsln))

                        # define reward function for Q
                        print '\t\tincluding reinforce loss to train inference network Q ...'
                        r_t = G.disconnected_grad(
                            _avgLen * T.mean(T.log10(prob_t + epsln))
                            +  # decoder loglikelihood
                            -_lambda * T.sum(Q *
                                             (T.log10(Q + epsln) -
                                              T.log10(P + epsln))) +  # KL(P|Q)
                            -_lambda *
                            T.sum(Qtm1 * (T.log10(Qtm1 + epsln) -
                                          T.log10(Q + epsln)))  # KL(Qt|Qtm1)
                        )

                        # actual reward after deducting baseline
                        reward_t = G.disconnected_grad(r_t - base_t)
                        baseline_t = base_t
                        #debug_t = r_t-base_t

                        # Q network loss: reinforce objective
                        posterior_loss_t = -T.switch(
                            T.lt(utt_group_t, self.dl - 1),
                            T.log10(posterior_t + epsln)[z_t],  # self-sup
                            _alpha * reward_t *
                            T.log10(posterior_t + epsln)[z_t]  # reinforce
                        )

                        # baseline loss
                        print '\t\tincluding baseline loss ...'
                        base_loss_t = T.switch(T.lt(utt_group_t, self.dl - 1),
                                               0., (r_t - baseline_t)**2)

                # snapshot objective
                if self.use_snap:
                    print '\t\tincluding decoder snapshot loss ...'
                    companion_loss_t += -T.sum(
                        snapCost_t[:masked_target_len_t - 1])

            # dummy, TODO: change it
            if self.ply != 'latent':
                posterior_t = posterior_tm1
                z_t = posterior_tm1
                reward_t = posterior_tm1
                prior_t = posterior_tm1
                debug_t = posterior_tm1

            # take the semi label for next input - like LM
            return inf_belief_t, masked_target_t, masked_target_len_t, \
                    target_feat_t, posterior_t, z_t,\
                    loss_t, companion_loss_t, prior_loss_t, posterior_loss_t, base_loss_t,\
                    reward_t, baseline_t, debug_t

        # initial belief state
        belief_0 = T.zeros((self.iseg[-1]), dtype=theano.config.floatX)
        belief_0 = T.set_subtensor(belief_0[[x - 1 for x in self.iseg[1:]]],
                                   1.0)
        # initial target jm1
        masked_target_tm1 = T.ones_like(masked_target[0])
        masked_target_len_tm1 = T.ones_like(masked_target_len[0])
        # initial target jm1 position features
        tarfeat_tm1 = -T.ones_like(tarfeat[0])
        # initial posterior
        p0 = np.ones((self.dl)) / float(self.dl)
        posterior_0 = theano.shared(p0.astype(theano.config.floatX))

        # Dialogue level forward propagation
        [_,_,_,_,posterior,sample,loss,companion_loss,prior_loss,posterior_loss,base_loss,
                reward,baseline,debug], updates= \
                theano.scan( fn=dialog_recur,
                sequences=[source,target,source_len,target_len,
                        masked_source,masked_target,
                        masked_source_len,masked_target_len,
                        utt_group, snapshot, success_rewards, samples,
                        change_label, db_degrees,
                        inf_trk_labels, req_trk_labels,
                        srcfeat, tarfeat],\
                outputs_info=[belief_0,masked_target_tm1,masked_target_len_tm1,tarfeat_tm1,
                        posterior_0,None,None,None,None,None,None,None,None,None])

        # Theano validation function
        self.valid = theano.function(
                inputs=[source, target, source_len, target_len,
                        masked_source, masked_target,
                        masked_source_len, masked_target_len,
                        utt_group, snapshot, success_rewards, samples,
                        change_label, inf_trk_labels, req_trk_labels,
                        db_degrees, srcfeat, tarfeat],\
                outputs=[loss,prior_loss,posterior],\
                updates=updates,\
                on_unused_input='warn')

        # RL validation function
        self.validRL = theano.function(
                inputs=[source, target, source_len, target_len,
                        masked_source, masked_target,
                        masked_source_len, masked_target_len,
                        utt_group, snapshot, success_rewards, samples,
                        change_label, inf_trk_labels, req_trk_labels,
                        db_degrees, srcfeat, tarfeat],\
                outputs=[prior_loss, debug],\
                updates=updates,\
                on_unused_input='warn')

        # for deterministic case, just loglikelihood
        if self.ply == 'attention' or self.ply == 'normal':

            # flatten parameters
            self.flatten_params = []
            for k in ['inftrk', 'reqtrk', 'dec', 'ply', 'enc']:
                ws = self.params[k]
                if self.learn_mode == 'all':
                    # train whole model
                    print '\tgradient w.r.t %s' % (k)
                    self.flatten_params += ws
                elif self.learn_mode == 'trk' and 'trk' in k:
                    # pretrain tracker
                    print '\tgradient w.r.t %s' % (k)
                    self.flatten_params += ws
                elif self.learn_mode == 'encdec':
                    # train * apart from tracker
                    if 'trk' in k: continue  # tracker
                    else:
                        print '\tgradient w.r.t %s' % (k)
                        self.flatten_params += ws

            # loss function
            self.cost = T.sum(loss) + 0.1 * T.sum(companion_loss)
            # gradients and updates
            updates = adam(self.cost, self.flatten_params, lr=lr, reg=reg)
            # default value for function output
            prior_loss = posterior_loss = baseline_loss = self.cost

        # for NVI
        elif self.ply == 'latent':

            # flatten parameters
            self.flatten_params = []
            for k in ['ply', 'enc', 'dec']:
                # train encoder decoder
                if self.learn_mode == 'encdec':
                    print '\tgradient w.r.t %s' % (k)
                    self.flatten_params += self.params[k]
                # fine-tune policy network by RL
                elif self.learn_mode == 'rl':
                    if k == 'ply':
                        print '\tgradient w.r.t %s prior network' % (k)
                        self.flatten_params += self.params[k][7:10]

            # loss function
            if self.learn_mode == 'rl':
                self.cost = T.sum(prior_loss)
            elif self.learn_mode == 'encdec':
                self.cost = T.sum(loss) + 0.1*T.sum(companion_loss) +\
                            T.sum(prior_loss) + T.sum(posterior_loss)
            # gradients and updates
            for p, q in adam(self.cost, self.flatten_params, lr=lr, reg=reg):
                updates.update({p: q})

            if self.learn_mode == 'encdec':
                # baseline objective
                for p, q in adam(T.sum(base_loss),
                                 self.policy.baseline.params,
                                 lr=lr * 10.,
                                 reg=0.):
                    updates.update({p: q})
                self.flatten_params.extend(self.policy.baseline.params)

        # theano training function
        self.train = theano.function(
                inputs= [source, target, source_len, target_len,
                        masked_source, masked_target,
                        masked_source_len, masked_target_len,
                        utt_group, snapshot, success_rewards, samples,
                        change_label, inf_trk_labels, req_trk_labels,
                        db_degrees, srcfeat, tarfeat, lr, reg],\
                outputs=[loss,prior_loss,posterior_loss,base_loss,
                        posterior,sample,reward,baseline,debug],\
                updates=updates,\
                on_unused_input='warn')

        # RL training function
        self.trainRL = theano.function(
                inputs= [source, target, source_len, target_len,
                        masked_source, masked_target,
                        masked_source_len, masked_target_len,
                        utt_group, snapshot, success_rewards, samples,
                        change_label, inf_trk_labels, req_trk_labels,
                        db_degrees, srcfeat, tarfeat, lr, reg],\
                outputs=[prior_loss,sample, debug],\
                updates=updates,\
                on_unused_input='warn')
Beispiel #17
0
def build_text_only_network(d_word, d_hidden, lr, eps=1e-6):

    # input theano vars
    in_context_fc7 = T.tensor3(
        name='context_images'
    )  # bsz x 3 x 4096 (because 3 context panels, fc7 features each of dim 4096)
    in_context_bb = T.tensor4(
        name='context_bb'
    )  # bsz x 3 x 3 x 4 (because 3 context panels, each contains a max of 3 speech boxes, each box described by 4 coordinates)
    in_bbmask = T.tensor3(
        name='bounding_box_mask'
    )  # bsz x 3 x 3 (because 3 context panels, each contains a max of 3 speech boxes, the mask has an entry of 1 in the ith position if the panel contains the ith speech box)
    in_context = T.itensor4(
        name='context'
    )  # bsz x 3 x 3 x 30 (because 3 context panels, each contains a max of 3 speech boxes, each box contains speech with a max of 30 words)
    in_cmask = T.tensor4(
        name='context_mask'
    )  # bsz x 3 x 3 x 30 (because 3 context panels, each contains a max of 3 speech boxes, each box contains speech with a max of 30 words, where the mask has an entry of 1 in the ith position if the ith word exists in the speech)
    in_answer_fc7 = T.matrix(
        name='answer_images'
    )  # bsz x 4096 (fc7 feature for the panel for which we want to guess the speech)
    in_answer_bb = T.matrix(
        name='answer_bb'
    )  # bsz x 4 (the answer panel has one speech box described by 4 coordinates)
    in_answers = T.itensor3(
        name='answers'
    )  # bsz x 3 x 30 (3 candidate answers each of max 30 words )
    in_amask = T.tensor3(
        name='answer_mask'
    )  # bsz x 3 x 30 (mask for 3 candidates answers, ie, an entry of 1 in the ith position if the ith word exists in the candidate)
    in_labels = T.imatrix(
        name='labels'
    )  # bsz x 3 (out of 3 candidate answers, the correct answer will have a 1)

    # define network
    l_context_fc7 = lasagne.layers.InputLayer(shape=(None, 3, 4096),
                                              input_var=in_context_fc7)
    l_answer_fc7 = lasagne.layers.InputLayer(shape=(None, 4096),
                                             input_var=in_answer_fc7)

    l_context = lasagne.layers.InputLayer(shape=(None, max_panels, max_boxes,
                                                 max_words),
                                          input_var=in_context)
    l_answers = lasagne.layers.InputLayer(shape=(None, 3, max_words),
                                          input_var=in_answers)

    l_cmask = lasagne.layers.InputLayer(shape=l_context.shape,
                                        input_var=in_cmask)
    l_amask = lasagne.layers.InputLayer(shape=l_answers.shape,
                                        input_var=in_amask)
    l_bbmask = lasagne.layers.InputLayer(shape=(None, 3, max_boxes),
                                         input_var=in_bbmask)

    # contexts and answers should share embeddings
    l_context_emb = lasagne.layers.EmbeddingLayer(l_context,
                                                  len_voc,
                                                  d_word,
                                                  name='word_emb')
    l_answer_emb = lasagne.layers.EmbeddingLayer(l_answers,
                                                 len_voc,
                                                 d_word,
                                                 W=l_context_emb.W)

    l_context_box_reps = SumAverageLayer([l_context_emb, l_cmask],
                                         compute_sum=True,
                                         num_dims=4)
    l_box_reshape = lasagne.layers.ReshapeLayer(l_context_box_reps,
                                                (-1, max_boxes, d_word))
    l_bbmask_reshape = lasagne.layers.ReshapeLayer(l_bbmask, (-1, max_boxes))
    l_box_lstm = lasagne.layers.LSTMLayer(l_box_reshape,
                                          num_units=d_word,
                                          mask_input=l_bbmask_reshape,
                                          only_return_final=True)
    l_context_panel_reps = lasagne.layers.ReshapeLayer(l_box_lstm,
                                                       (-1, 3, d_word))
    l_context_final_reps = lasagne.layers.LSTMLayer(l_context_panel_reps,
                                                    num_units=d_word,
                                                    only_return_final=True)

    l_ans_reps = SumAverageLayer([l_answer_emb, l_amask],
                                 compute_sum=True,
                                 num_dims=3)
    l_scores = InnerProductLayer([l_context_final_reps, l_ans_reps])

    preds = lasagne.layers.get_output(l_scores)
    loss = T.mean(lasagne.objectives.categorical_crossentropy(
        preds, in_labels))

    all_params = lasagne.layers.get_all_params(l_scores, trainable=True)
    updates = lasagne.updates.adam(loss, all_params, learning_rate=lr)
    train_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask, in_labels
    ],
                               loss,
                               updates=updates,
                               on_unused_input='warn')
    pred_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask
    ],
                              preds,
                              on_unused_input='warn')
    return train_fn, pred_fn, l_scores
Beispiel #18
0
                              inputs=output,
                              mask_type=('b', 1)))

    output = lib.ops.conv2d.Conv2D('Dec2.Out',
                                   input_dim=DIM_PIX_2,
                                   output_dim=2 * LATENT_DIM_1,
                                   filter_size=1,
                                   inputs=output,
                                   mask_type=('b', 1),
                                   he_init=False)

    return output


total_iters = T.iscalar('total_iters')
images = T.itensor4('images')  # shape: (batch size, n channels, height, width)

alpha = T.minimum(
    1,
    T.cast(total_iters, theano.config.floatX) / lib.floatX(ALPHA_ITERS))


def split(mu_and_logsig):
    mu, logsig = mu_and_logsig[:, ::2], mu_and_logsig[:, 1::2]
    logsig = T.log(T.nnet.softplus(logsig))
    return mu, logsig


def clamp_logsig(logsig):
    beta = T.minimum(
        1,
Beispiel #19
0
def main(train_path,val_path,save_path,num_epochs=NUM_EPOCHS):
    # save settings
    shutil.copyfile('settings.py','%s/settings.txt'%save_path)

    print("Preparing Data...")

    # Training data
    if not RELOAD_DATA:
        print("Creating Pairs...")
        trainX = batched_tweets.create_pairs(train_path)
        valX = batched_tweets.create_pairs(val_path)
        print("Number of training pairs = {}".format(len(trainX[0])))
        print("Number of validation pairs = {}".format(len(valX[0])))
        with open('%s/train_pairs.pkl'%(save_path),'w') as f:
            pkl.dump(trainX, f)
        with open('%s/val_pairs.pkl'%(save_path),'w') as f:
            pkl.dump(valX, f)
    else:
        print("Loading Pairs...")
        with open(train_path,'r') as f:
            trainX = pkl.load(f)
        with open(val_path,'r') as f:
            valX = pkl.load(f)

    if not RELOAD_MODEL:
        # Build dictionary
        chardict, charcount = batched_tweets.build_dictionary(trainX[0] + trainX[1])
        n_char = len(chardict.keys()) + 1
        batched_tweets.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path)

        # params
        params = init_params_c2w2s(n_chars=n_char)

    else:
        print("Loading model params...")
        params = load_params_shared('%s/model.npz' % save_path)

        print("Loading dictionary...")
        with open('%s/dict.pkl' % save_path, 'rb') as f:
            chardict = pkl.load(f)
        n_char = len(chardict.keys()) + 1

    train_iter = batched_tweets.BatchedTweets(trainX, batch_size=N_BATCH, maxlen=MAX_LENGTH)
    val_iter = batched_tweets.BatchedTweets(valX, batch_size=512, maxlen=MAX_LENGTH)

    print("Building network...")

    # Tweet variables
    tweet = T.itensor4()
    ptweet = T.itensor4()
    ntweet = T.itensor4()

    # masks
    t_mask = T.ftensor3()
    tp_mask = T.ftensor3()
    tn_mask = T.ftensor3()

    # Embeddings
    emb_t, c2w, w2s = char2word2vec(tweet, t_mask, params, n_char)
    emb_tp, c2w, w2s = char2word2vec(ptweet, tp_mask, params, n_char)
    emb_tn, c2w, w2s = char2word2vec(ntweet, tn_mask, params, n_char)

    # batch loss
    D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
    D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn))
    gap = D1-D2+M
    loss = gap*(gap>0)
    cost = T.mean(loss) + REGULARIZATION*lasagne.regularization.regularize_network_params(c2w, lasagne.regularization.l2) + REGULARIZATION*lasagne.regularization.regularize_network_params(w2s, lasagne.regularization.l2)
    cost_only = T.mean(loss)
    reg_only = REGULARIZATION*lasagne.regularization.regularize_network_params(c2w, lasagne.regularization.l2) + REGULARIZATION*lasagne.regularization.regularize_network_params(w2s, lasagne.regularization.l2)

    # params and updates
    print("Computing updates...")
    lr = LEARNING_RATE
    mu = MOMENTUM
    updates = lasagne.updates.nesterov_momentum(cost, lasagne.layers.get_all_params(w2s), lr, momentum=mu)

    # Theano function
    print("Compiling theano functions...")
    inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask]
    #dist = theano.function(inps,[D1,D2])
    #l = theano.function(inps,loss)
    cost_val = theano.function(inps,[cost_only, emb_t, emb_tp, emb_tn])
    train = theano.function(inps,cost,updates=updates)
    reg_val = theano.function([],reg_only)

    # Training
    print("Training...")
    uidx = 0
    try:
        for epoch in range(num_epochs):
            n_samples = 0
            train_cost = 0.
            print("Epoch {}".format(epoch))

            if USE_SCHEDULE:
                # schedule
                if epoch > 0 and epoch % 5 == 0:
                    print("Updating Schedule...")
                    lr = max(1e-5,lr/2)
                    mu = mu - 0.05
                    updates = lasagne.updates.nesterov_momentum(cost, lasagne.layers.get_all_params(net), lr, momentum=mu)
                    train = theano.function(inps,cost,updates=updates)

            ud_start = time.time()
            for x,y,z in train_iter:
                if not x:
                    print("Minibatch with no valid triples")
                    continue

                n_samples +=len(x)
                uidx += 1
                if DEBUG and uidx > 3:
                    sys.exit()

                if DEBUG:
                    print("Tweets = {}".format(x[:5]))

                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data_c2w2s(x, y, z, chardict, maxwordlen=MAX_WORD_LENGTH, maxseqlen=MAX_SEQ_LENGTH, n_chars=n_char)

                if x==None:
                    print("Minibatch with zero samples under maxlength.")
                    uidx -= 1
                    continue

                if DEBUG:
                    print("Params before update...")
                    print_params(params)
                    display_actv(x,x_m,y,y_m,z,z_m,inps,w2s,'before')
                    cb, embb, embb_p, embb_n = cost_val(x,x_m,y,y_m,z,z_m)

                curr_cost = train(x,x_m,y,y_m,z,z_m)
                train_cost += curr_cost*len(x)

                if DEBUG:
                    print("Params after update...")
                    print_params(params)
                    display_actv(x,x_m,y,y_m,z,z_m,inps,w2s,'after')
                    ca, emba, emba_p, emba_n = cost_val(x,x_m,y,y_m,z,z_m)
                    print("Embeddings before = {}".format(embb[:5]))
                    print("Embeddings after = {}".format(emba[:5]))
                    print("Cost before update = {} \nCost after update = {}".format(cb, ca))

                if np.isnan(curr_cost) or np.isinf(curr_cost):
                    print("Nan detected.")
                    return

                ud = time.time() - ud_start
                if np.mod(uidx, DISPF) == 0:
                    print("Epoch {} Update {} Cost {} Time {} Samples {}".format(epoch,uidx,curr_cost,ud,len(x)))

                if np.mod(uidx,SAVEF) == 0:
                    print("Saving...")
                    saveparams = OrderedDict()
                    for kk,vv in params.iteritems():
                        saveparams[kk] = vv.get_value()
                        np.savez('%s/model.npz' % save_path,**saveparams)
                    print("Done.")

            print("Computing Validation Cost...")
            validation_cost = 0.
            n_val_samples = 0
            for x,y,z in val_iter:
                if not x:
                    print("Validation: Minibatch with no valid triples")
                    continue

                n_val_samples += len(x)
                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data_c2w2s(x, y, z, chardict, maxwordlen=MAX_WORD_LENGTH, maxseqlen=MAX_SEQ_LENGTH, n_chars=n_char)

                if x==None:
                    print("Validation: Minibatch with zero samples under maxlength")
                    continue

                curr_cost, _, _, _ = cost_val(x,x_m,y,y_m,z,z_m)
                validation_cost += curr_cost*len(x)

            regularization_cost = reg_val()
            print("Epoch {} Training Cost {} Validation Cost {} Regularization Cost {}".format(epoch, train_cost/n_samples, validation_cost/n_val_samples, regularization_cost))
            print("Seen {} samples.".format(n_samples))

            for kk,vv in params.iteritems():
                print("Param {} Epoch {} Max {} Min {}".format(kk, epoch, np.max(vv.get_value()), np.min(vv.get_value())))

            print("Saving...")
            saveparams = OrderedDict()
            for kk,vv in params.iteritems():
                saveparams[kk] = vv.get_value()
                np.savez('%s/model_%d.npz' % (save_path,epoch),**saveparams)
            print("Done.")
            
            if False:
                # store embeddings and data
                features = np.zeros((len(train_iter.data[0]),3*WDIM))
                distances = np.zeros((len(train_iter.data[0]),2))
                for idx, triple in enumerate(zip(train_iter.data[0],train_iter.data[1],train_iter.data[2])):
                    x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data([triple[0]], [triple[1]], [triple[2]], chardict, maxlen=MAX_LENGTH, n_chars=n_char)
                    if x==None:
                        continue
                    emb1, emb2, emb3 = t2v(x,x_m,y,y_m,z,z_m)
                    emb1 = np.reshape(emb1, (WDIM))
                    emb2 = np.reshape(emb2, (WDIM))
                    emb3 = np.reshape(emb3, (WDIM))
                    features[idx,:] = np.concatenate((emb1,emb2,emb3),axis=0)
                    distances[idx,0] = 1-np.dot(emb1,emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                    distances[idx,1] = 1-np.dot(emb1,emb3)/(np.linalg.norm(emb1)*np.linalg.norm(emb3))
                with open('debug/feat_%d.npy'%epoch,'w') as df:
                    np.save(df,features)
                with open('debug/dist_%d.npy'%epoch,'w') as ds:
                    np.save(ds,distances)
        if False:
            with open('debug/data.txt','w') as dd:
                for triple in zip(train_iter.data[0],train_iter.data[1],train_iter.data[2]):
                    dd.write('%s\t%s\t%s\n' % (triple[0],triple[1],triple[2]))

    except KeyboardInterrupt:
        pass
Beispiel #20
0
    def __init__(self, args):
        '''
        nh :: dimension of the hidden layer
        nc :: number of classes
        ne :: number of word embeddings in the vocabulary
        #de :: dimension of the word embeddings
        cs :: word window context size
        '''
        self.container = {}
        
        self.args = args
        self.args['rng'] = numpy.random.RandomState(3435)
        self.args['dropoutTrigger'] = args['dropoutTrigger'] if args['dropoutTrigger'] > 0. else 0.
        self.args['dropoutArg'] = args['dropoutArg'] if args['dropoutArg'] > 0. else 0.
        
        # parameters of the model
        
        self.container['params'], self.container['names'] = [], []
        
        self.container['embDict'] = OrderedDict()
        self.container['vars'] = OrderedDict()
        self.container['dimIn'] = 0
        

        print '******************FEATURES******************'
        for ed in self.args['features']:
            if self.args['features'][ed] == 0:
                self.container['embDict'][ed] = theano.shared(self.args['embs'][ed].astype(theano.config.floatX))
                
                if self.args['updateEmbs']:
                    print '@@@@@@@ Will update embedding table: ', ed
                    self.container['params'] += [self.container['embDict'][ed]]
                    self.container['names'] += [ed]

            if self.args['features'][ed] == 0:
                self.container['vars'][ed] = T.imatrix()
                dimAdding = self.args['embs'][ed].shape[1]
                self.container['dimIn'] += dimAdding         
            elif self.args['features'][ed] == 1:
                self.container['vars'][ed] = T.tensor3()
                dimAdding = self.args['features_dim'][ed]
                self.container['dimIn'] += dimAdding

            if self.args['features'][ed] >= 0:
                print 'represetation - ', ed, ' : ', dimAdding 
                                
        print 'REPRESENTATION DIMENSION = ', self.container['dimIn']
        
        if self.args['distanceFet'] == 0:
            self.container['embDict']['dist1'] = theano.shared(self.args['embs']['dist1'].astype(theano.config.floatX))
            self.container['embDict']['dist2'] = theano.shared(self.args['embs']['dist2'].astype(theano.config.floatX))
            self.container['embDict']['dist3'] = theano.shared(self.args['embs']['dist3'].astype(theano.config.floatX))
            
            if self.args['updateEmbs']:
                print '@@@@@@@ Will update distance embedding tables'
                self.container['params'] += [self.container['embDict']['dist1'], self.container['embDict']['dist2'], self.container['embDict']['dist3']]
                self.container['names'] += ['dist1', 'dist2', 'dist3']
        
        if self.args['triggerGlob'] == 0:
            self.container['embDict']['trigger'] = theano.shared(self.args['embs']['trigger'].astype(theano.config.floatX))
            
            if self.args['updateEmbs']:
                print '@@@@@@@ Will update trigger embedding table'
                self.container['params'] += [ self.container['embDict']['trigger'] ]
                self.container['names'] += ['trigger']
        
        #self.container['sentLength'] = T.ivector('sentLength')
        
        self.container['triggerAnn'] = T.imatrix('triggerAnn')
        self.container['triggerMaskTrain'] = T.matrix('triggerMaskTrain')
        self.container['triggerMaskTest'] = T.imatrix('triggerMaskTest')
        self.container['triggerMaskTrainArg'] = T.matrix('triggerMaskTrainArg')
        self.container['triggerMaskTestArg'] = T.imatrix('triggerMaskTestArg')
        
        self.container['entities'] = T.imatrix('entities')
        
        self.container['argumentEntityIdAnn'] = T.itensor3('argumentEntityIdAnn')
        self.container['argumentPosAnn'] = T.itensor3('argumentPosAnn')
        self.container['argumentLabelAnn'] = T.itensor3('argumentLabelAnn')
        self.container['argumentMaskTrain'] = T.tensor3('argumentMaskTrain')
        
        self.container['possibleEnityIdByTrigger'] = T.itensor3('possibleEnityIdByTrigger')
        self.container['possibleEnityPosByTrigger'] = T.itensor3('possibleEnityPosByTrigger')
        self.container['argumentMaskTest'] = T.itensor3('argumentMaskTest')
        
        self.container['relDistBinary'] = T.tensor4('relDistBinary') #dimshuffle(1,0,2,3) first
        self.container['relDistIdxs'] = T.itensor3('relDistIdxs') #dimshuffle(1,0,2) first
        
        self.container['NodeFets'] = T.itensor3('NodeFets')
        self.container['EdgeFets'] = T.itensor4('EdgeFets')
        
        #self.container['numEntities'] = T.iscalar('numEntities')
        self.container['lr'] = T.scalar('lr')
        self.container['zeroVector'] = T.vector('zeroVector')
        
        self.glob = {}
        self.glob['batch'] = self.args['batch']
        self.glob['maxSentLength'] = self.args['maxSentLength']
        self.glob['numTrigger'] = self.args['numTrigger']
        self.glob['numArg'] = self.args['numArg']
        self.glob['maxNumEntities'] = self.args['maxNumEntities']
        self.glob['eachTrigger'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['maxSentLength'], self.glob['numTrigger']]).astype(theano.config.floatX))
        self.glob['eachArg'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['maxSentLength'], self.glob['numArg']]).astype(theano.config.floatX))
        self.glob['eachTriggerId'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['maxSentLength']]).astype('int32'))
        self.glob['eachArgId'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['maxSentLength']]).astype('int32'))
        
        self.glob['trigger'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['numTrigger']]).astype(theano.config.floatX))
        self.glob['arg'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['numArg']]).astype(theano.config.floatX))
        self.glob['argTrigger'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['maxNumEntities'], self.glob['numTrigger']]).astype(theano.config.floatX))
        self.glob['argArg'] = theano.shared(numpy.zeros([self.glob['batch'], self.glob['maxNumEntities'], self.glob['numArg']]).astype(theano.config.floatX))
        
        self.globZero = {}
        self.globZero['eachTrigger'] = numpy.zeros([self.glob['batch'], self.glob['maxSentLength'], self.glob['numTrigger']]).astype(theano.config.floatX)
        self.globZero['eachArg'] = numpy.zeros([self.glob['batch'], self.glob['maxSentLength'], self.glob['numArg']]).astype(theano.config.floatX)
        self.globZero['eachTriggerId'] = numpy.zeros([self.glob['batch'], self.glob['maxSentLength']]).astype('int32')
        self.globZero['eachArgId'] = numpy.zeros([self.glob['batch'], self.glob['maxSentLength']]).astype('int32')
        
        self.globZero['trigger'] = numpy.zeros([self.glob['batch'], self.glob['numTrigger']]).astype(theano.config.floatX)
        self.globZero['arg'] = numpy.zeros([self.glob['batch'], self.glob['numArg']]).astype(theano.config.floatX)
        self.globZero['argTrigger'] = numpy.zeros([self.glob['batch'], self.glob['maxNumEntities'], self.glob['numTrigger']]).astype(theano.config.floatX)
        self.globZero['argArg'] = numpy.zeros([self.glob['batch'], self.glob['maxNumEntities'], self.glob['numArg']]).astype(theano.config.floatX)
        
        self.globVar = {}
        self.globVar['eachTrigger'] = T.tensor3()
        self.globVar['eachArg'] = T.tensor3()
        self.globVar['eachTriggerId'] = T.imatrix()
        self.globVar['eachArgId'] = T.imatrix()
        
        self.globVar['trigger'] = T.matrix()
        self.globVar['arg'] = T.matrix()
        self.globVar['argTrigger'] = T.tensor3()
        self.globVar['argArg'] = T.tensor3()
        
        self.globFunc = {}
        
        self.container['setZero'] = OrderedDict()
        self.container['zeroVecs'] = OrderedDict()
Beispiel #21
0
def main(data_path, model_path):

    print("Loading data...")
    with open(data_path,'r') as f:
        valX = pkl.load(f)

    print("Preparing data...")
    val_iter = batched_tweets.BatchedTweets(valX, batch_size=512, maxlen=MAX_LENGTH)

    print("Loading dictionary...")
    with open('%s/dict.pkl' % model_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # check for model files
    files = sorted(glob.glob(model_path+'model_*.npz'))
    print("Found {} model files".format(len(files)))

    for modelf in files:
        print("Computing validation cost on {}".format(modelf))

        print("Loading params...")
        params = load_params(modelf)

        print("Building network...")

        # Tweet variables
        tweet = T.itensor4()
        ptweet = T.itensor4()
        ntweet = T.itensor4()

        # masks
        t_mask = T.ftensor3()
        tp_mask = T.ftensor3()
        tn_mask = T.ftensor3()

        # Embeddings
        emb_t = char2word2vec(tweet, t_mask, params, n_char)[0]
        emb_tp = char2word2vec(ptweet, tp_mask, params, n_char)[0]
        emb_tn = char2word2vec(ntweet, tn_mask, params, n_char)[0]
        
        # batch cost
        D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
        D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn))
        gap = D1-D2+M
        loss = gap*(gap>0)
        cost = T.mean(loss)
        reg = REGULARIZATION*lasagne.regularization.regularize_network_params(char2word2vec(tweet, t_mask, params, n_char)[1], lasagne.regularization.l2) + REGULARIZATION*lasagne.regularization.regularize_network_params(char2word2vec(tweet, t_mask, params, n_char)[2], lasagne.regularization.l2)


        # Theano function
        print("Compiling theano function...")
        inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask]
        cost_val = theano.function(inps,cost)
        reg_val = theano.function([], reg)

        print("Testing...")
        uidx = 0
        try:
            validation_cost = 0.
            reg_cost = 0.
            n_val_samples = 0
            for x,y,z in val_iter:
                if not x:
                    print("Validation: Minibatch with no valid triples")
                    continue

                n_val_samples += len(x)
                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data_c2w2s(x, y, z, chardict, maxwordlen=MAX_WORD_LENGTH, maxseqlen=MAX_SEQ_LENGTH, n_chars=n_char)

                if x==None:
                    print("Validation: Minibatch with zero samples under maxlength")
                    continue

                curr_cost = cost_val(x,x_m,y,y_m,z,z_m)
                validation_cost += curr_cost*len(x)

            reg_cost = reg_val()
            print("Model {} Validation Cost {} Regularization Cost {}".format(modelf, validation_cost/n_val_samples, reg_cost))
            print("Seen {} samples.".format(n_val_samples))

        except KeyboardInterrupt:
            pass
Beispiel #22
0
    def create_network(self):
        def save_model(metrics, epoch_nr):
            max_f1_idx = np.argmax(metrics["f1_macro_validate"])
            max_f1 = np.max(metrics["f1_macro_validate"])
            if epoch_nr == max_f1_idx and max_f1 > 0.01:  # saving to network drives takes 5s (to local only 0.5s) -> do not save so often
                print("  Saving weights...")
                for fl in glob.glob(join(self.HP.EXP_PATH, "best_weights_ep*")
                                    ):  # remove weights from previous epochs
                    os.remove(fl)
                try:
                    np.savez(
                        join(self.HP.EXP_PATH,
                             "best_weights_ep" + str(epoch_nr) + ".npz"),
                        *L.layers.get_all_param_values(self.output))
                except IOError:
                    print(
                        "\nERROR: Could not save weights because of IO Error\n"
                    )
                self.HP.BEST_EPOCH = epoch_nr

        def load_model(path):
            ExpUtils.print_verbose(self.HP,
                                   "Loading weights ... ({})".format(path))
            with np.load(
                    path
            ) as f:  #if both pathes are absolute and beginning of pathes are the same, join will merge the beginning
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            L.layers.set_all_param_values(output_layer_for_loss, param_values)

        if self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "single_direction":
            # NR_OF_GRADIENTS = 15  # SH-Coeff
            NR_OF_GRADIENTS = 9
        elif self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "combined":
            # NR_OF_GRADIENTS = 3
            NR_OF_GRADIENTS = 3 * self.HP.NR_OF_CLASSES
            # NR_OF_GRADIENTS = self.HP.NR_OF_CLASSES
        else:
            NR_OF_GRADIENTS = 33

        print("Building network ...")
        # Lasagne Seed for Reproducibility
        L.random.set_rng(np.random.RandomState(1))

        net = self.get_UNet(n_input_channels=NR_OF_GRADIENTS,
                            num_output_classes=self.HP.NR_OF_CLASSES,
                            input_dim=self.HP.INPUT_DIM,
                            base_n_filters=self.HP.UNET_NR_FILT)

        output_layer_for_loss = net["output_flat"]

        if self.HP.LOAD_WEIGHTS:
            load_model(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH))

        X_sym = T.tensor4()
        # y_sym = T.imatrix()     # (bs*x*y, nr_of_classes)
        y_sym = T.itensor4()  # (bs, nr_of_classes, x, y)
        y_sym_flat = y_sym.dimshuffle(
            (0, 2, 3, 1))  # (bs, x, y, nr_of_classes)
        y_sym_flat = y_sym_flat.reshape(
            (-1, y_sym_flat.shape[3]))  # (bs*x*y, nr_of_classes)

        # add some weight decay
        # l2_loss = L.regularization.regularize_network_params(output_layer_for_loss, L.regularization.l2) * 1e-5

        ##Train
        prediction_train = L.layers.get_output(output_layer_for_loss,
                                               X_sym,
                                               deterministic=False)
        loss_vec_train = L.objectives.binary_crossentropy(
            prediction_train, y_sym_flat)
        loss_vec_train = loss_vec_train.mean(
            axis=1
        )  #before: (bs*x*y, nrClasses) (= elementwise binary CE), after: (bs*x*y) (= same shape as output from categorical CE)
        # loss_train = loss_vec_train.mean() + l2_loss
        loss_train = loss_vec_train.mean()

        ##Test
        prediction_test = L.layers.get_output(output_layer_for_loss,
                                              X_sym,
                                              deterministic=True)
        # prediction_test = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=False)   #for Dropout Sampling
        loss_vec_test = L.objectives.binary_crossentropy(
            prediction_test, y_sym_flat)
        loss_vec_test = loss_vec_test.mean(axis=1)
        # loss_test = loss_vec_test.mean() + l2_loss
        loss_test = loss_vec_test.mean()

        ##Parameter Updates
        all_params = L.layers.get_all_params(output_layer_for_loss,
                                             trainable=True)
        learning_rate = theano.shared(np.float32(self.HP.LEARNING_RATE))
        # updates = L.updates.adam(loss_train, all_params, learning_rate)
        updates = L.updates.adamax(loss_train, all_params, learning_rate)

        ##Convenience function
        output_train = L.layers.get_output(net["output"],
                                           X_sym,
                                           deterministic=False)
        output_test = L.layers.get_output(net["output"],
                                          X_sym,
                                          deterministic=True)
        # output_test = L.layers.get_output(net["output"], X_sym, deterministic=False)  #for Dropout Sampling

        #Calc F1 NEW (simpler)
        output_shuff_train = output_train.dimshuffle(
            (0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_train = theano_binary_dice_per_instance_and_class(
            output_shuff_train, y_sym, dim=2, first_spatial_axis=2
        )  # (bs, nrClasses) -> dice for each class in each batch
        f1_train = T.mean(dice_scores_train)  #average over batches and classes

        output_shuff_test = output_test.dimshuffle(
            (0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_test = theano_binary_dice_per_instance_and_class(
            output_shuff_test, y_sym, dim=2, first_spatial_axis=2
        )  # (bs, nrClasses) -> dice for each class in each batch
        f1_test = T.mean(dice_scores_test)  # average over batches and classes

        #Define Functions
        train_fn = theano.function(
            [X_sym, y_sym], [loss_train, prediction_train, f1_train],
            updates=updates
        )  # prediction_TEST, weil hier auch nicht Dropout will bei Score??
        predict_fn = theano.function([X_sym, y_sym],
                                     [loss_test, prediction_test, f1_test])

        get_probs = theano.function([X_sym], output_test)

        #Exporting variables
        self.learning_rate = learning_rate
        self.train = train_fn
        self.predict = predict_fn
        self.get_probs = get_probs  # (bs, x, y, nrClasses)
        self.net = net
        # self.output = output_layer_for_loss     # this is used for saving weights (could probably also be simplified)
        self.save_model = save_model
        self.load_model = load_model
Beispiel #23
0
def main():

    # Where we'll save data to
    fname = sys.argv[0].split('.py')[0]
    curr_time = datetime.now().strftime('%d%H%M')
    save_dir = 'sample-' + fname + curr_time

    lrate = 5e-4
    batch_size = 1
    num_epochs = 100
    crop_size = 360
    input_var = T.tensor4('x')
    target_var = T.itensor4('y')

    images = np.load('images.npz')['arr_0'].astype(
        theano.config.floatX) / 255.0
    labels = np.load('labels.npz')['arr_0'].astype(np.int32)

    num_classes = labels.shape[1]

    idx = np.arange(num_classes)
    idx = idx.reshape(1, num_classes, 1, 1)
    labels = labels / 255
    labels = labels.astype(np.int32) * idx
    labels = np.sum(labels, axis=1, keepdims=True)

    np.random.seed(1234)
    idx = np.arange(images.shape[0])
    np.random.shuffle(idx)
    X_train = images[idx[:-10]]
    y_train = labels[idx[:-10]]
    X_valid = images[idx[-10:]]
    y_valid = labels[idx[-10:]]

    # Compute class weights to balance dataset
    counts = []
    for cl in xrange(num_classes):
        class_counts = 0
        for img in y_train:
            class_counts += np.sum(img == cl)
        counts.append(class_counts)
    counts = np.array(counts).astype(theano.config.floatX)

    # We can either upscale the loss (i.e. multiply by a factor > 1), or
    # downscale the loss (multiply by a factor < 1). Here we do the latter
    counts = np.max(counts) / counts
    counts = counts / np.max(counts)
    counts[0] = counts[0] * 1.1  # stem
    counts[1] = counts[1] * 1.1  # tomato
    counts = T.as_tensor_variable(counts)

    # Build DenseNetwork
    input_shape = (None, 3, crop_size, crop_size)
    softmax, network = build_network(input_var, input_shape, num_classes)

    print 'Number of paramters: ', nn.count_params(network)

    preds = nn.get_output(softmax, deterministic=False)
    loss = lasagne.objectives.categorical_crossentropy(preds,
                                                       target_var.flatten())
    loss = loss * counts[target_var.flatten()]
    loss = T.mean(loss) + regularize_network_params(softmax, l2) * 0.0001

    acc = T.mean(T.eq(T.argmax(preds, axis=1), target_var.flatten()))

    params = nn.get_all_params(softmax, trainable=True)
    updates = lasagne.updates.adam(loss, params, lrate)
    train_fn = theano.function([input_var, target_var], [loss, acc],
                               updates=updates,
                               allow_input_downcast=True)

    probs, preds = nn.get_output([softmax, network], deterministic=True)
    loss = lasagne.objectives.categorical_crossentropy(probs,
                                                       target_var.flatten())
    loss = loss * counts[target_var.flatten()]
    loss = T.mean(loss) + regularize_network_params(softmax, l2) * 0.0001

    acc = T.mean(T.eq(T.argmax(probs, axis=1), target_var.flatten()))

    valid_fn = theano.function([input_var, target_var], [loss, acc, preds],
                               allow_input_downcast=True)

    # We iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_acc = 0
        train_batches = 0

        start_time = time.time()
        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch

            inputs, targets = random_crop(inputs, targets, crop_size,
                                          crop_size)

            err, acc = train_fn(inputs, targets)
            train_err += err
            train_acc += acc
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        valid_iou = np.zeros((num_classes, ))
        val_preds, val_inputs, val_targets = [], [], []
        for batch in iterate_minibatches(X_valid,
                                         y_valid,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch

            input_crop, target_crop = random_crop(inputs, targets, crop_size,
                                                  crop_size)

            err, acc, preds = valid_fn(input_crop, target_crop)
            val_err += err
            val_acc += acc
            val_batches += 1

            val_preds.append(preds)
            val_inputs.append(input_crop)
            val_targets.append(target_crop)

            valid_iou += meanIOU(preds, target_crop, num_classes)

        if epoch % 2 == 0:
            val_preds = np.vstack(val_preds)
            val_inputs = np.vstack(val_inputs)
            val_targets = np.vstack(val_targets)
            plot_predictions(val_inputs, val_preds, val_targets, epoch,
                             save_dir)

        # Then we print the results for this epoch:
        print "Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time)
        print "  training loss:\t\t{:.6f}".format(train_err / train_batches)
        print "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
        print "  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100)
        print "  validation IOU:\t\t{}".format(valid_iou / val_batches)
Beispiel #24
0
    ))

    output = T.concatenate([masked_targets, output], axis=1)

    output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix3', input_dim=2*DIM_3, output_dim=DIM_PIX_2, filter_size=3, inputs=output, mask_type=('b', 1)))
    output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix4', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=3, inputs=output, mask_type=('b', 1)))

    output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix7', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, inputs=output, mask_type=('b', 1)))
    output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix8', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, inputs=output, mask_type=('b', 1)))

    output = lib.ops.conv2d.Conv2D('Dec2.Out', input_dim=DIM_PIX_2, output_dim=2*LATENT_DIM_1, filter_size=1, inputs=output, mask_type=('b', 1), he_init=False)

    return output

total_iters = T.iscalar('total_iters')
images = T.itensor4('images') # shape: (batch size, n channels, height, width)

alpha = T.minimum(1, T.cast(total_iters, theano.config.floatX) / lib.floatX(ALPHA_ITERS))

def split(mu_and_logsig):
    mu, logsig = mu_and_logsig[:,::2], mu_and_logsig[:,1::2]
    logsig = T.log(T.nnet.softplus(logsig))
    return mu, logsig

def clamp_logsig(logsig):
    beta = T.minimum(1, T.cast(total_iters, theano.config.floatX) / lib.floatX(BETA_ITERS))
    return T.nnet.relu(logsig, alpha=beta)

# Layer 1

mu_and_logsig1 = Enc1(images)
Beispiel #25
0
PIXEL_CNN_LAYERS = 4

LR = 2e-4

BATCH_SIZE = 100
N_CHANNELS = 1
HEIGHT = 28
WIDTH = 28

TIMES = ('iters', 10 * 500, 1000 * 500)

lib.print_model_settings(locals().copy())

# inputs.shape: (batch size, n channels, height, width)
if MODE == '256ary':
    inputs = T.itensor4('inputs')
    inputs_embed = lib.ops.embedding.Embedding('Embedding', 256, DIM, inputs)
    inputs_embed = inputs_embed.dimshuffle(0, 1, 4, 2, 3)
    inputs_embed = inputs_embed.reshape(
        (inputs_embed.shape[0], inputs_embed.shape[1] * inputs_embed.shape[2],
         inputs_embed.shape[3], inputs_embed.shape[4]))

    output = lib.ops.conv2d.Conv2D('InputConv',
                                   input_dim=N_CHANNELS * DIM,
                                   output_dim=DIM,
                                   filter_size=7,
                                   inputs=inputs_embed,
                                   mask_type=('a', N_CHANNELS),
                                   he_init=False)
else:
    inputs = T.tensor4('inputs')
Beispiel #26
0
from theano_toolkit.parameters import Parameters
from theano_toolkit import updates
import data_io
import model
import math
from pprint import pprint
import vae

if __name__ == "__main__":
    chunk_size = 512
    batch_size = 64
    P = Parameters()
    autoencoder, inpaint = model.build(P)

    parameters = P.values()
    X = T.itensor4('X')
    X_hat, posteriors, priors = autoencoder(T.cast(X, 'float32') / 255.)
    latent_kls = [
        T.mean(vae.kl_divergence(po_m, po_s, pr_m, pr_s), axis=0)
        for (po_m, po_s), (pr_m, pr_s) in zip(posteriors, priors)
    ]

    beta_start = 500 * (np.arange(len(latent_kls)) + 1)
    beta_lin = theano.shared(np.float32(0))
    betas_ = (beta_lin - beta_start) / np.float32(500)
    betas_ = T.switch(betas_ < 0, 0, betas_)
    betas = T.switch(betas_ > 1, 1, betas_)[::-1]
    print betas.eval()
    train_latent_kl = sum(betas[i] * kl for i, kl in enumerate(latent_kls))
    latent_kl = sum(latent_kls)
    recon_loss = model.cost(X_hat, X[:, :, 16:-16, 16:-16])
def classifier(input_vars, network_build_fn, n_target_spatial_dims=0, target_channel_index=None,
               score=dnn_objective.ClassifierObjective.SCORE_ERROR, mask=False, includes_softmax=False,
               params_source=None, *args, **kwargs):
    """
    Construct a classifier, given input variables and a network building function
    and an optional path from which to load parameters.
    :param input_vars: a list of input variables. If `None`, the network will be searched for `InputLayer` instances
        and their input variables will be used.
    :param network_build_fn: network builder function of the form `fn(input_vars) -> lasagne_layer`
        that constructs a network in the form of a Lasagne layer, given an input variable (a Theano variable)
    :param n_target_spatial_dims: the number of spatial dimensions for the target;
        0 for predict per sample with ivector variable type
        1 for 1-dimensional prediction e.g. time series, with imatrix variable type (sample, time),
        2 for 2-dimensional prediction e.g. image, with itensor3 variable type (sample, height, width),
        3 for 3-dimensional prediction e.g. volume, with itensor4 variable type (sample, depth, height, width),
    :param target_channel_index: if None, targets are assumed not to have a channel dimension. If an integer,
        then this channel will be used for the target, e.g.
        for a target with 0 spatial dimensions, if `target_channel_index` is `None` then the targets
        should have shape `(sample,)`, while if there are 5 channels and the target uses channel 2,
        the target should have shape `(sample, 5)` and we will access the target indices in channel, e.g. `y[:,2]`.
        Note that the additional channel dimension adds an additional dimension to target and mask variables, e.g.
        0, 1, 2 and 3 dimensional targets and masks use imatrix, itensor3, itensor4 and itensor5 variable types.
    :param score: the scoring metric used to evaluate classifier performance (see `dnn_objective.ClassifierObjective`)
    :param mask: (default=False) if True, samples will be masked, in which case sample weights/masks should
        be passed during training
    :param includes_softmax: `True` indicates that the final network layer includes the softmax non-linearity,
        `False` indicates that it does not, in which case a non-linearity layer will be added
    :param params_source: [optional] source from which to obtain network parameters; either
        a str/unicode that contains the path of a NumPy array file from which to load the parameters,
        or a `BasicDNN` or Lasagne layer from which to copy the parameters
    :return: a classifier instance
    """
    # Prepare Theano variables for inputs and targets
    n_target_tims = n_target_spatial_dims + (0 if target_channel_index is None else 1)
    if n_target_tims == 0:
        target_var = T.ivector('y')
    elif n_target_tims == 1:
        target_var = T.imatrix('y')
    elif n_target_tims == 2:
        target_var = T.itensor3('y')
    elif n_target_tims == 3:
        target_var = T.itensor4('y')
    else:
        raise ValueError('Valid values for n_target_spatial_dims are in the range 0-3, not {}'.format(
            n_target_spatial_dims))

    if mask:
        if n_target_tims == 0:
            mask_var = T.vector('m')
        elif n_target_tims == 1:
            mask_var = T.matrix('m')
        elif n_target_tims == 2:
            mask_var = T.tensor3('m')
        elif n_target_tims == 3:
            mask_var = T.tensor4('m')
        else:
            raise ValueError('Valid values for n_target_spatial_dims are in the range 0-3, not {}'.format(
                n_target_spatial_dims))
        mask_vars = [mask_var]
    else:
        mask_var = None
        mask_vars = []


    # Build the network
    network = network_build_fn(input_vars=input_vars)
    if input_vars is None:
        input_vars = _get_input_vars(network)

    objective = dnn_objective.ClassifierObjective('y', network, target_var, mask_expr=mask_var,
                                                  n_target_spatial_dims=n_target_spatial_dims,
                                                  target_channel_index=target_channel_index, score=score,
                                                  includes_softmax=includes_softmax)

    return BasicClassifierDNN(input_vars, [target_var] + mask_vars, network, objective,
                              params_source=params_source, *args, **kwargs)
    def create_network(self):

        if self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "single_direction":
            # NR_OF_GRADIENTS = 15  # SH-Coeff
            NR_OF_GRADIENTS = 9
        elif self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "combined":
            # NR_OF_GRADIENTS = 3
            NR_OF_GRADIENTS = 3*self.HP.NR_OF_CLASSES    # 54
        else:
            NR_OF_GRADIENTS = 33

        if self.HP.RESOLUTION == "1.25mm":
            input_dim = (144, 144)
        elif self.HP.RESOLUTION == "2mm" or self.HP.RESOLUTION == "2.5mm":
            input_dim = (80, 80)


        print("Building network ...")
        print("(Model UNet)")
        # Lasagne Seed for Reproducibility
        L.random.set_rng(np.random.RandomState(1))

        net = self.get_UNet(n_input_channels=NR_OF_GRADIENTS, num_output_classes=self.HP.NR_OF_CLASSES, input_dim=input_dim, base_n_filters=self.HP.UNET_NR_FILT)

        output_layer_for_loss = net["output_flat"]

        if self.HP.LOAD_WEIGHTS:
            print("Loading weights ... ({})".format(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)))
            with np.load(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)) as f: #if both pathes are absolute and beginning of pathes are the same, join will merge the beginning
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            L.layers.set_all_param_values(output_layer_for_loss, param_values)


        X_sym = T.tensor4()
        w_sym = T.dvector()

        # y_sym = T.dmatrix()
        y_sym = T.itensor4()    # (bs, nr_of_classes, x, y)
        y_sym_flat = y_sym.dimshuffle((0, 2, 3, 1))  # (bs, x, y, nr_of_classes)
        y_sym_flat = y_sym_flat.reshape((-1, y_sym_flat.shape[3]))  # (bs*x*y, nr_of_classes)

        # add some weight decay
        # l2_loss = L.regularization.regularize_network_params(output_layer_for_loss, L.regularization.l2) * 1e-5

        ##Train
        prediction_train = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=False)
        loss_vec_train = L.objectives.squared_error(prediction_train, y_sym_flat)
        loss_vec_train = loss_vec_train.mean(axis=1) #before: (bs*x*y, nrClasses) (= elementwise binary CE), after: (bs*x*y) (= same shape as output from categorical CE)
        loss_vec_train *= w_sym
        # loss_train = loss_vec_train.mean() + l2_loss
        loss_train = loss_vec_train.mean()

        ##Test
        prediction_test = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=True)
        loss_vec_test = L.objectives.squared_error(prediction_test, y_sym_flat)
        loss_vec_test = loss_vec_test.mean(axis=1)
        loss_vec_test *= w_sym
        # loss_test = loss_vec_test.mean() + l2_loss
        loss_test = loss_vec_test.mean()

        ##Parameter Updates
        all_params = L.layers.get_all_params(output_layer_for_loss, trainable=True)
        # learning_rate = theano.shared(floatX(0.0001))
        learning_rate = theano.shared(np.float32(self.HP.LEARNING_RATE))
        # updates = L.updates.adam(loss_train, all_params, learning_rate)
        updates = L.updates.adamax(loss_train, all_params, learning_rate)

        ##Convenience function
        output = L.layers.get_output(net["output"], X_sym, deterministic=True)

        #Calc F1
        f1_per_call_train, _ = theano.scan(theano_f1_score, outputs_info=None,
                                       sequences=[theano.tensor.arange(y_sym_flat.shape[1])],
                                       non_sequences=[prediction_train, y_sym_flat])
        f1_per_call_test, _ = theano.scan(theano_f1_score, outputs_info=None,
                                        sequences=[theano.tensor.arange(y_sym.shape[1])],
                                        non_sequences=[prediction_test, y_sym_flat])
        f1_train = T.mean(f1_per_call_train)
        f1_test = T.mean(f1_per_call_test)


        train_fn = theano.function([X_sym, y_sym, w_sym], [loss_train, prediction_train, f1_train], updates=updates) #prediction_TEST, weil hier auch nicht Dropout will bei Score??
        predict_fn = theano.function([X_sym, y_sym, w_sym], [loss_test, prediction_test, f1_test])
        get_probs = theano.function([X_sym], output)

        #Exporting variables
        self.learning_rate = learning_rate
        self.train = train_fn
        self.predict = predict_fn
        self.get_probs = get_probs    # (bs, x, y, nrClasses)
        self.net = net
        self.output = output_layer_for_loss     # this is used for saving weights (could probably also be simplified)
    def compileTheanoFunctions(self):
        print(" ----------------- Starting compilation process ----------------- ")        
        
        # ------- Create and initialize sharedVariables needed to compile the training function ------ #
        # -------------------------------------------------------------------------------------------- #
        # For training 
        self.trainingData_x = theano.shared(np.zeros([1,1,1,1,1], dtype="float32"), borrow = True)
        self.trainingData_y = theano.shared(np.zeros([1,1,1,1], dtype="float32") , borrow = True)  
        
        self.trainingData_x_Bottom = theano.shared(np.zeros([1,1,1,1,1], dtype="float32"), borrow = True)
        
        # For testing 
        self.testingData_x_Bottom = theano.shared(np.zeros([1,1,1,1,1], dtype="float32"), borrow = True)
        self.testingData_x = theano.shared(np.zeros([1,1,1,1,1], dtype="float32"), borrow = True)
        
        x_Train = self.inputNetwork_Train
        x_Train_Bottom = self.inputNetwork_Train_Bottom
        x_Test  = self.inputNetwork_Test
        x_Test_Bottom  = self.inputNetwork_Test_Bottom
        y_Train = T.itensor4('y')
        
        # Allocate symbolic variables for the data
        index_Train = T.lscalar()
        index_Test  = T.lscalar()
        
        # ------- Needed to compile the training function ------ #
        # ------------------------------------------------------ #
        trainingData_y_CastedToInt   = T.cast( self.trainingData_y, 'int32') 
        
        # To accomodate the weights in the cost function to account for class imbalance
        weightsOfClassesInCostFunction = T.fvector()  
        weightPerClass = T.fvector() 
        
        # --------- Get trainable parameters (to be fit by gradient descent) ------- #
        # -------------------------------------------------------------------------- #
        
        [paramsTraining, numberParamsPerLayer] = self.getTrainable_Params()
        
        # ------------------ Define the cost function --------------------- #
        # ----------------------------------------------------------------- #
        def negLogLikelihood():
            print (" --- Cost function: negativeLogLikelihood")
            
            costInLastLayer = self.lastLayer.negativeLogLikelihoodWeighted(y_Train,weightPerClass)
            return costInLastLayer
            
        def NotDefined():
            print (" --- Cost function: Not defined!!!!!! WARNING!!!")

        optionsCostFunction = {0 : negLogLikelihood,
                               1 : NotDefined}

        costInLastLayer = optionsCostFunction[self.costFunction]()
        
        # --------------------------- Get costs --------------------------- #
        # ----------------------------------------------------------------- #
        # Get L1 and L2 weights regularization
        costL1 = 0
        costL2 = 0
        
        # Compute the costs
        for l_i in xrange(0, len(self.networkLayers)) :    
                costL1 += abs(self.networkLayers[l_i].W).sum()
                costL2 += (self.networkLayers[l_i].W ** 2).sum()
        
        # Add also the cost of the last layer                     
        cost = (costInLastLayer
                + self.L1_reg_C * costL1
                + self.L2_reg_C * costL2)

        # --------------------- Include all trainable parameters in updates (for optimization) ---------------------- #
        # ----------------------------------------------------------------------------------------------------------- #
        updates = self.getUpdatesOfTrainableParameters(cost, paramsTraining, numberParamsPerLayer)
        
        # --------------------- Include batch normalization params ---------------------- #
        # ------------------------------------------------------------------------------- #
        updates = updates + self.updateParams_BatchNorm()

        # For the testing function we need to get the Feature maps activations
        featMapsActivations = []
        lower_act = 0
        upper_act = 9999
        
        # TODO: Change to output_Test
        for l_i in xrange(0,len(self.networkLayers)):
            featMapsActivations.append(self.networkLayers[l_i].outputTest[:, lower_act : upper_act, :, :, :])

        # For the last layer get the predicted probabilities (p_y_given_x_test)
        featMapsActivations.append(self.lastLayer.p_y_given_x_test)

        # --------------------- Preparing data to compile the functions ---------------------- #
        # ------------------------------------------------------------------------------------ #
        
        givensDataSet_Train = { x_Train: self.trainingData_x[index_Train * self.batch_Size: (index_Train + 1) * self.batch_Size],
                                x_Train_Bottom: self.trainingData_x_Bottom[index_Train * self.batch_Size: (index_Train + 1) * self.batch_Size],
                                y_Train: trainingData_y_CastedToInt[index_Train * self.batch_Size: (index_Train + 1) * self.batch_Size],
                                weightPerClass: weightsOfClassesInCostFunction }

       
        givensDataSet_Test  = { x_Test: self.testingData_x[index_Test * self.batch_Size: (index_Test + 1) * self.batch_Size],
                                x_Test_Bottom: self.testingData_x_Bottom[index_Test * self.batch_Size: (index_Test + 1) * self.batch_Size] }
        
        print(" ...Compiling the training function...")
        
        self.networkModel_Train = theano.function(
                                    [index_Train, weightsOfClassesInCostFunction],
                                    #[cost] + self.lastLayer.doEvaluation(y_Train),
                                    [cost],
                                    updates=updates,
                                    givens = givensDataSet_Train
                                    )
                          
        print(" ...The training function was compiled...")

        #self.getProbabilities = theano.function(
                         #[index],
                         #self.lastLayer.p_y_given_x_Train,
                         #givens={
                            #x: self.trainingData_x[index * _self.batch_size: (index + 1) * _self.batch_size]
                         #}
         #)
     

        print(" ...Compiling the testing function...")
        self.networkModel_Test = theano.function(
                                  [index_Test],
                                  featMapsActivations,
                                  givens = givensDataSet_Test
                                  )
        print(" ...The testing function was compiled...")
Beispiel #30
0
    def compileTheanoFunctions(self):
        print(
            " ----------------- Starting compilation process ----------------- "
        )

        # ------- Create and initialize sharedVariables needed to compile the training function ------ #
        # -------------------------------------------------------------------------------------------- #
        # For training
        self.trainingData_x = theano.shared(np.zeros([1, 1, 1, 1, 1],
                                                     dtype="float32"),
                                            borrow=True)
        self.trainingData_y = theano.shared(np.zeros([1, 1, 1, 1],
                                                     dtype="float32"),
                                            borrow=True)

        # For testing
        self.testingData_x = theano.shared(np.zeros([1, 1, 1, 1, 1],
                                                    dtype="float32"),
                                           borrow=True)

        x_Train = self.inputNetwork_Train
        x_Test = self.inputNetwork_Test
        y_Train = T.itensor4('y')

        # Allocate symbolic variables for the data
        index_Train = T.lscalar()
        index_Test = T.lscalar()

        # ------- Needed to compile the training function ------ #
        # ------------------------------------------------------ #
        trainingData_y_CastedToInt = T.cast(self.trainingData_y, 'int32')

        # To accomodate the weights in the cost function to account for class imbalance
        weightsOfClassesInCostFunction = T.fvector()
        weightPerClass = T.fvector()

        # --------- Get trainable parameters (to be fit by gradient descent) ------- #
        # -------------------------------------------------------------------------- #

        [paramsTraining, numberParamsPerLayer] = self.getTrainable_Params()

        # ------------------ Define the cost function --------------------- #
        # ----------------------------------------------------------------- #
        def negLogLikelihood():
            print(" --- Cost function: negativeLogLikelihood")

            costInLastLayer = self.lastLayer.negativeLogLikelihoodWeighted(
                y_Train, weightPerClass)
            return costInLastLayer

        def NotDefined():
            print(" --- Cost function: Not defined!!!!!! WARNING!!!")

        optionsCostFunction = {0: negLogLikelihood, 1: NotDefined}

        costInLastLayer = optionsCostFunction[self.costFunction]()

        # --------------------------- Get costs --------------------------- #
        # ----------------------------------------------------------------- #
        # Get L1 and L2 weights regularization
        costL1 = 0
        costL2 = 0

        # Compute the costs
        for l_i in xrange(0, len(self.networkLayers)):
            costL1 += abs(self.networkLayers[l_i].W).sum()
            costL2 += (self.networkLayers[l_i].W**2).sum()

        # Add also the cost of the last layer
        cost = (costInLastLayer + self.L1_reg_C * costL1 +
                self.L2_reg_C * costL2)

        # --------------------- Include all trainable parameters in updates (for optimization) ---------------------- #
        # ----------------------------------------------------------------------------------------------------------- #
        updates = self.getUpdatesOfTrainableParameters(cost, paramsTraining,
                                                       numberParamsPerLayer)

        # --------------------- Include batch normalization params ---------------------- #
        # ------------------------------------------------------------------------------- #
        updates = updates + self.updateParams_BatchNorm()

        # For the testing function we need to get the Feature maps activations
        featMapsActivations = []
        lower_act = 0
        upper_act = 9999

        # TODO: Change to output_Test
        for l_i in xrange(0, len(self.networkLayers)):
            featMapsActivations.append(
                self.networkLayers[l_i].
                outputTest[:, lower_act:upper_act, :, :, :])

        # For the last layer get the predicted probabilities (p_y_given_x_test)
        featMapsActivations.append(self.lastLayer.p_y_given_x_test)

        # --------------------- Preparing data to compile the functions ---------------------- #
        # ------------------------------------------------------------------------------------ #

        givensDataSet_Train = {
            x_Train:
            self.trainingData_x[index_Train *
                                self.batch_Size:(index_Train + 1) *
                                self.batch_Size],
            y_Train:
            trainingData_y_CastedToInt[index_Train *
                                       self.batch_Size:(index_Train + 1) *
                                       self.batch_Size],
            weightPerClass:
            weightsOfClassesInCostFunction
        }

        givensDataSet_Test = {
            x_Test:
            self.testingData_x[index_Test * self.batch_Size:(index_Test + 1) *
                               self.batch_Size]
        }

        print(" ...Compiling the training function...")

        self.networkModel_Train = theano.function(
            [index_Train, weightsOfClassesInCostFunction],
            #[cost] + self.lastLayer.doEvaluation(y_Train),
            [cost],
            updates=updates,
            givens=givensDataSet_Train)

        print(" ...The training function was compiled...")

        #self.getProbabilities = theano.function(
        #[index],
        #self.lastLayer.p_y_given_x_Train,
        #givens={
        #x: self.trainingData_x[index * _self.batch_size: (index + 1) * _self.batch_size]
        #}
        #)

        print(" ...Compiling the testing function...")
        self.networkModel_Test = theano.function([index_Test],
                                                 featMapsActivations,
                                                 givens=givensDataSet_Test)
        print(" ...The testing function was compiled...")
Beispiel #31
0
print("loading data...")
trainX = pkl.load(open(data_path,'r'))

# dictionary
chardict, charcount = batched_tweets.build_dictionary(trainX[0] + trainX[1])
n_char = len(chardict.keys()) + 1

# model params
params = init_params_c2w2s(n_chars=n_char)

# batches
print("preparing batches...")
train_iter = batched_tweets.BatchedTweets(trainX, batch_size=N_BATCH, maxlen=MAX_LENGTH)

# Tweet variables
tweet = T.itensor4()
ptweet = T.itensor4()
ntweet = T.itensor4()

# masks
t_mask = T.ftensor3()
tp_mask = T.ftensor3()
tn_mask = T.ftensor3()

# Embeddings
emb_t = char2word2vec(tweet, t_mask, params, n_char)[0]
emb_tp = char2word2vec(ptweet, tp_mask, params, n_char)[0]
emb_tn = char2word2vec(ntweet, tn_mask, params, n_char)[0]

# batch loss
D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
Beispiel #32
0
def test_embedding_layer():

    print "Testing embedding layer..."

    for k in xrange(1):
        print "Layer %i..." % k

        # random parameters
        input_dim = np.random.randint(1, 100)
        output_dim = np.random.randint(1, 100)

        # embedding layer
        embedding_layer = layer.EmbeddingLayer(input_dim, output_dim, 'test')

        for i in xrange(40):
            print "%i" % i,

            # tests for dimension 1, 2, 3 and 4
            if i % 4 == 0:
                input = T.ivector('input_test')
                input_value = np.random.randint(
                    low=0,
                    high=input_dim,
                    size=(np.random.randint(low=1, high=50),)
                ).astype(np.int32)
            elif i % 4 == 1:
                input = T.imatrix('input_test')
                input_value = np.random.randint(
                    low=0,
                    high=input_dim,
                    size=(np.random.randint(low=1, high=40),
                          np.random.randint(low=1, high=40))
                ).astype(np.int32)
            elif i % 4 == 2:
                input = T.itensor3('input_test')
                input_value = np.random.randint(
                    low=0,
                    high=input_dim,
                    size=(np.random.randint(low=1, high=30),
                          np.random.randint(low=1, high=30),
                          np.random.randint(low=1, high=30))
                ).astype(np.int32)
            else:
                input = T.itensor4('input_test')
                input_value = np.random.randint(
                    low=0,
                    high=input_dim,
                    size=(np.random.randint(low=1, high=20),
                          np.random.randint(low=1, high=20),
                          np.random.randint(low=1, high=20),
                          np.random.randint(low=1, high=20))
                ).astype(np.int32)

            output = embedding_layer.link(input)

            expected_value = embedding_layer.embeddings.get_value()[input_value]

            assert expected_value.shape == input_value.shape + (output_dim,)
            np.testing.assert_array_almost_equal(
                output.eval({input: input_value}),
                expected_value
            )

        print "OK"

    print "All tests ran successfully for Embedding Layer."
Beispiel #33
0
    else:
        return lib.ops.conv_decoder.ConvDecoder(
            'Decoder',
            input_dim=LATENT_DIM,
            n_unpools=CONV_N_POOLS,
            base_n_filters=CONV_BASE_N_FILTERS,
            filter_size=CONV_FILTER_SIZE,
            output_size=WIDTH,
            output_n_channels=N_CHANNELS,
            inputs=latents
        )


total_iters = T.iscalar('total_iters')
if MODE=='256ary':
    images = T.itensor4('images')
else:
    images = T.tensor4('images') # shape (batch size, n channels, height, width)

mu, log_sigma = Encoder(images)

if VANILLA:
    latents = mu
else:
    eps = T.cast(theano_srng.normal(mu.shape), theano.config.floatX)
    latents = mu + (eps * T.exp(log_sigma))

outputs = Decoder(latents)

if MODE=='256ary':
    reconst_cost = T.nnet.categorical_crossentropy(
Beispiel #34
0
    def create_network(self):

        if self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "single_direction":
            # NR_OF_GRADIENTS = 15  # SH-Coeff
            NR_OF_GRADIENTS = 9
        elif self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "combined":
            # NR_OF_GRADIENTS = 3
            NR_OF_GRADIENTS = 3 * self.HP.NR_OF_CLASSES  # 54
        else:
            NR_OF_GRADIENTS = 33

        if self.HP.RESOLUTION == "1.25mm":
            input_dim = (144, 144)
        elif self.HP.RESOLUTION == "2mm" or self.HP.RESOLUTION == "2.5mm":
            input_dim = (80, 80)

        print("Building network ...")
        print("(Model UNet)")
        # Lasagne Seed for Reproducibility
        L.random.set_rng(np.random.RandomState(1))

        net = self.get_UNet(n_input_channels=NR_OF_GRADIENTS,
                            num_output_classes=self.HP.NR_OF_CLASSES,
                            input_dim=input_dim,
                            base_n_filters=self.HP.UNET_NR_FILT)

        output_layer_for_loss = net["output_flat"]

        if self.HP.LOAD_WEIGHTS:
            print("Loading weights ... ({})".format(
                join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)))
            with np.load(
                    join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)
            ) as f:  #if both pathes are absolute and beginning of pathes are the same, join will merge the beginning
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            L.layers.set_all_param_values(output_layer_for_loss, param_values)

        X_sym = T.tensor4()
        # y_sym = T.imatrix()     # (bs*x*y, nr_of_classes)
        y_sym = T.itensor4()  # (bs, nr_of_classes, x, y)

        prediction_train = L.layers.get_output(output_layer_for_loss,
                                               X_sym,
                                               deterministic=False)
        prediction_test = L.layers.get_output(output_layer_for_loss,
                                              X_sym,
                                              deterministic=True)
        output_train = L.layers.get_output(net["output"],
                                           X_sym,
                                           deterministic=False)
        output_test = L.layers.get_output(net["output"],
                                          X_sym,
                                          deterministic=True)

        #Calc F1 NEW (simpler)
        output_shuff_train = output_train.dimshuffle(
            (0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_train = theano_binary_dice_per_instance_and_class(
            output_shuff_train, y_sym, dim=2, first_spatial_axis=2
        )  # (bs, nrClasses) -> dice for each class in each batch
        f1_train = T.mean(dice_scores_train)  #average over batches and classes
        dice_scores_train_continuous = theano_binary_dice_per_instance_and_class_for_loss(
            output_shuff_train, y_sym, dim=2, first_spatial_axis=2
        )  # (bs, nrClasses) -> dice for each class in each batch
        f1_train_continuous = T.mean(
            dice_scores_train_continuous)  # average over batches and classes

        output_shuff_test = output_test.dimshuffle(
            (0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_test = theano_binary_dice_per_instance_and_class(
            output_shuff_test, y_sym, dim=2, first_spatial_axis=2
        )  # (bs, nrClasses) -> dice for each class in each batch
        f1_test = T.mean(dice_scores_test)  # average over batches and classes
        dice_scores_test_continuous = theano_binary_dice_per_instance_and_class_for_loss(
            output_shuff_test, y_sym, dim=2, first_spatial_axis=2
        )  # (bs, nrClasses) -> dice for each class in each batch
        f1_test_continuous = T.mean(
            dice_scores_test_continuous)  # average over batches and classes

        loss_train = 1 - f1_train_continuous
        loss_test = 1 - f1_test_continuous

        ##Parameter Updates
        all_params = L.layers.get_all_params(output_layer_for_loss,
                                             trainable=True)
        learning_rate = theano.shared(np.float32(self.HP.LEARNING_RATE))
        # updates = L.updates.adam(loss_train, all_params, learning_rate)
        updates = L.updates.adamax(loss_train, all_params, learning_rate)

        # Define Functions
        train_fn = theano.function(
            [X_sym, y_sym], [loss_train, prediction_train, f1_train],
            updates=updates
        )  # prediction_TEST, weil hier auch nicht Dropout will bei Score??
        predict_fn = theano.function([X_sym, y_sym],
                                     [loss_test, prediction_test, f1_test])

        get_probs = theano.function([X_sym], output_test)

        #Exporting variables
        self.learning_rate = learning_rate
        self.train = train_fn
        self.predict = predict_fn
        self.get_probs = get_probs  # (bs, x, y, nrClasses)
        self.net = net
        self.output = output_layer_for_loss  # this is used for saving weights (could probably also be simplified)
def train(args):
    print '\nNEURAL POS TAGGER START\n'

    print '\tINITIAL EMBEDDING\t%s %s' % (args.word_list, args.emb_list)
    print '\tWORD\t\t\tEmb Dim: %d  Hidden Dim: %d' % (args.w_emb_dim, args.w_hidden_dim)
    print '\tCHARACTER\t\tEmb Dim: %d  Hidden Dim: %d' % (args.c_emb_dim, args.c_hidden_dim)
    print '\tOPTIMIZATION\t\tMethod: %s  Learning Rate: %f\n' % (args.opt, args.lr)
    print '\tMINI-BATCH: %d\n' % args.batch_size

    """ load data """
    print 'Loading data sets...\n'
    train_corpus, vocab_word, vocab_char, vocab_tag, max_char_len = io_utils.load_conll(args.train_data)

    """ limit data set """
    train_corpus = train_corpus[:args.data_size]
    train_corpus.sort(key=lambda a: len(a))

    dev_corpus = None
    if args.dev_data:
        dev_corpus, dev_vocab_word, dev_vocab_char, dev_vocab_tag, max_char_len_dev = io_utils.load_conll(args.dev_data)

        for w in dev_vocab_word.i2w:
            if args.vocab_size is None or vocab_word.size() < args.vocab_size:
                vocab_word.add_word(w)
        for c in dev_vocab_char.i2w:
            vocab_char.add_word(c)
        for t in dev_vocab_tag.i2w:
            vocab_tag.add_word(t)

    if args.save:
        io_utils.dump_data(vocab_word, 'vocab_word')
        io_utils.dump_data(vocab_char, 'vocab_char')
        io_utils.dump_data(vocab_tag, 'vocab_tag')

    """ load pre-trained embeddings """
    init_w_emb = None
    if args.emb_list:
        print '\tLoading word embeddings...\n'
        init_w_emb = io_utils.load_init_emb(args.emb_list, args.word_list, vocab_word)
        w_emb_dim = init_w_emb.shape[1]
    else:
        w_emb_dim = args.w_emb_dim

    """ converting into ids """
    print '\nConverting into IDs...\n'
    tr_x, tr_c, tr_b, tr_y = convert_into_ids(train_corpus, vocab_word, vocab_char, vocab_tag, max_char_len)
    tr_x, tr_c, tr_y, tr_b = set_minibatch(tr_x, tr_c, tr_y, max_char_len, args.batch_size)
    tr_x, tr_c, tr_y = shared_samples(tr_x, tr_c, tr_y)

    if args.dev_data:
        dev_x, dev_c, dev_b, dev_y = convert_into_ids(dev_corpus, vocab_word, vocab_char, vocab_tag, max_char_len_dev)
        dev_x, dev_c, dev_y, dev_b = set_minibatch(dev_x, dev_c, dev_y, max_char_len_dev, 1)
        dev_x, dev_c, dev_y = shared_samples(dev_x, dev_c, dev_y)
        print '\tTrain Sentences: %d  Dev Sentences: %d' % (len(train_corpus), len(dev_corpus))
    else:
        print '\tTrain Sentences: %d' % len(train_corpus)

    print '\tWord size: %d  Char size: %d' % (vocab_word.size(), vocab_char.size())

    """ set model parameters """
    w_hidden_dim = args.w_hidden_dim
    c_emb_dim = args.c_emb_dim
    c_hidden_dim = args.c_hidden_dim
    output_dim = vocab_tag.size()
    window = args.window
    opt = args.opt

    """ symbol definition """
    print '\tCompiling Theano Code...'
    bos = T.iscalar('bos')
    eos = T.iscalar('eos')
    n_words = T.iscalar('n_words')
    batch_size = T.iscalar('batch_size')
    x = T.imatrix('x')
    c = T.itensor4('c')
    y = T.ivector('y')
    lr = T.fscalar('lr')

    """ tagger set up """
    tagger = Model(x=x, c=c, y=y, n_words=n_words, batch_size=batch_size, lr=lr, init_emb=init_w_emb,
                   vocab_w_size=vocab_word.size(), w_emb_dim=w_emb_dim, w_hidden_dim=w_hidden_dim,
                   c_emb_dim=c_emb_dim, c_hidden_dim=c_hidden_dim, output_dim=output_dim,
                   vocab_c_size=vocab_char.size(), window=window, opt=opt)

    train_f = theano.function(
        inputs=[bos, eos, n_words, batch_size, lr],
        outputs=[tagger.nll, tagger.result],
        updates=tagger.updates,
        givens={
            x: tr_x[bos: eos],
            c: tr_c[bos: eos],
            y: tr_y[bos: eos]
        },
        mode='FAST_RUN'
    )

    dev_f = theano.function(
        inputs=[bos, eos, n_words, batch_size],
        outputs=tagger.result,
        givens={
            x: dev_x[bos: eos],
            c: dev_c[bos: eos],
            y: dev_y[bos: eos]
        },
        mode='FAST_RUN'
    )

    def _train():
        for epoch in xrange(args.epoch):
            _lr = args.lr / float(epoch+1)
            indices = range(len(tr_b))
            random.shuffle(indices)

            print '\nEpoch: %d' % (epoch + 1)
            print '\tBatch Index: ',
            start = time.time()

            total = 0.0
            correct = 0
            losses = 0.0

            for i, index in enumerate(indices):
                if i % 100 == 0 and i != 0:
                    print i,
                    sys.stdout.flush()

                boundary = tr_b[index]
                loss, corrects = train_f(boundary[0], boundary[1], boundary[2],boundary[3],  _lr)

                assert math.isnan(loss) is False, i

                total += len(corrects)
                correct += np.sum(corrects)
                losses += loss

            end = time.time()
            print '\tTime: %f seconds' % (end - start)
            print '\tNegative Log Likelihood: %f' % losses
            print '\tAccuracy:%f  Total:%d  Correct:%d' % ((correct / total), total, correct)

            _dev(dev_f)

    def _dev(model):
        print '\tBatch Index: ',
        start = time.time()

        total = 0.0
        correct = 0

        for index in xrange(len(dev_b)):
            if index % 100 == 0 and index != 0:
                print index,
                sys.stdout.flush()

            boundary = dev_b[index]
            corrects = model(boundary[0], boundary[1], boundary[2], boundary[3])

            total += len(corrects)
            correct += np.sum(corrects)

        end = time.time()
        print '\tTime: %f seconds' % (end - start)
        print '\tAccuracy:%f  Total:%d  Correct:%d' % ((correct / total), total, correct)

    _train()
    def create_network(self):

        if self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "single_direction":
            # NR_OF_GRADIENTS = 15  # SH-Coeff
            NR_OF_GRADIENTS = 9
        elif self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "combined":
            # NR_OF_GRADIENTS = 3
            NR_OF_GRADIENTS = 3*self.HP.NR_OF_CLASSES    # 54
        else:
            NR_OF_GRADIENTS = 33

        if self.HP.RESOLUTION == "1.25mm":
            input_dim = (144, 144)
        elif self.HP.RESOLUTION == "2mm" or self.HP.RESOLUTION == "2.5mm":
            input_dim = (80, 80)


        print("Building network ...")
        print("(Model UNet)")
        # Lasagne Seed for Reproducibility
        L.random.set_rng(np.random.RandomState(1))

        net = self.get_UNet(n_input_channels=NR_OF_GRADIENTS, num_output_classes=self.HP.NR_OF_CLASSES, input_dim=input_dim, base_n_filters=self.HP.UNET_NR_FILT)

        output_layer_for_loss = net["output_flat"]

        if self.HP.LOAD_WEIGHTS:
            print("Loading weights ... ({})".format(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)))
            with np.load(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)) as f: #if both pathes are absolute and beginning of pathes are the same, join will merge the beginning
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            L.layers.set_all_param_values(output_layer_for_loss, param_values)


        X_sym = T.tensor4()
        # y_sym = T.imatrix()     # (bs*x*y, nr_of_classes)
        y_sym = T.itensor4()    # (bs, nr_of_classes, x, y)

        prediction_train = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=False)
        prediction_test = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=True)
        output_train = L.layers.get_output(net["output"], X_sym, deterministic=False)
        output_test = L.layers.get_output(net["output"], X_sym, deterministic=True)

        #Calc F1 NEW (simpler)
        output_shuff_train = output_train.dimshuffle((0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_train = theano_binary_dice_per_instance_and_class(output_shuff_train, y_sym, dim=2, first_spatial_axis=2)  # (bs, nrClasses) -> dice for each class in each batch
        f1_train = T.mean(dice_scores_train)  #average over batches and classes
        dice_scores_train_continuous = theano_binary_dice_per_instance_and_class_for_loss(output_shuff_train, y_sym, dim=2, first_spatial_axis=2)  # (bs, nrClasses) -> dice for each class in each batch
        f1_train_continuous = T.mean(dice_scores_train_continuous)  # average over batches and classes

        output_shuff_test = output_test.dimshuffle((0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_test = theano_binary_dice_per_instance_and_class(output_shuff_test, y_sym, dim=2, first_spatial_axis=2)  # (bs, nrClasses) -> dice for each class in each batch
        f1_test = T.mean(dice_scores_test)  # average over batches and classes
        dice_scores_test_continuous = theano_binary_dice_per_instance_and_class_for_loss(output_shuff_test, y_sym, dim=2, first_spatial_axis=2)  # (bs, nrClasses) -> dice for each class in each batch
        f1_test_continuous = T.mean(dice_scores_test_continuous)  # average over batches and classes

        loss_train = 1 - f1_train_continuous
        loss_test = 1 - f1_test_continuous

        ##Parameter Updates
        all_params = L.layers.get_all_params(output_layer_for_loss, trainable=True)
        learning_rate = theano.shared(np.float32(self.HP.LEARNING_RATE))
        # updates = L.updates.adam(loss_train, all_params, learning_rate)
        updates = L.updates.adamax(loss_train, all_params, learning_rate)

        # Define Functions
        train_fn = theano.function([X_sym, y_sym], [loss_train, prediction_train, f1_train], updates=updates)  # prediction_TEST, weil hier auch nicht Dropout will bei Score??
        predict_fn = theano.function([X_sym, y_sym], [loss_test, prediction_test, f1_test])

        get_probs = theano.function([X_sym], output_test)

        #Exporting variables
        self.learning_rate = learning_rate
        self.train = train_fn
        self.predict = predict_fn
        self.get_probs = get_probs    # (bs, x, y, nrClasses)
        self.net = net
        self.output = output_layer_for_loss     # this is used for saving weights (could probably also be simplified)
Beispiel #37
0
DIM = 32
GRAD_CLIP = 1.
Q_LEVELS = 256
BATCH_SIZE = 20
PRINT_EVERY = 250
EPOCH = 100

OUT_DIR = '/Tmp/kumarkun/cifar10'
create_folder_if_not_there(OUT_DIR)

model = Model(name = "CIFAR10.pixelCNN")


is_train = T.scalar()
X = T.tensor4('X') # shape: (batchsize, channels, height, width)
X_r = T.itensor4('X_r')

X_transformed = X_r.dimshuffle(0,2,3,1)
input_layer = WrapperLayer(X.dimshuffle(0,2,3,1)) # input reshaped to (batchsize, height, width,3)

pixel_CNN = pixelConv(
	input_layer, 
	3, 
	DIM,
	Q_LEVELS = Q_LEVELS,
	name = model.name + ".pxCNN",
	num_layers = 12,
	)

model.add_layer(pixel_CNN)
Beispiel #38
0
                                                WIDTH)).dimshuffle(
                                                    0, 2, 3, 4, 1)
        else:
            output = lib.ops.conv2d.Conv2D('OutputConv3',
                                           input_dim=PIX_DIM,
                                           output_dim=N_CHANNELS,
                                           filter_size=1,
                                           inputs=output,
                                           mask_type=('b', N_CHANNELS),
                                           he_init=False)

        return output


total_iters = T.iscalar('total_iters')
images = T.itensor4('images')  # shape: (batch size, n channels, height, width)

mu, log_sigma = Encoder(images)

# mu = lib.debug.print_stats('mu', mu)
# log_sigma = lib.debug.print_stats('log_sigma', log_sigma)

if VANILLA:
    latents = mu
else:
    eps = T.cast(theano_srng.normal(mu.shape), theano.config.floatX)
    latents = mu + (eps * T.exp(log_sigma))

latents = T.minimum(50, latents)
latents = T.maximum(-50, latents)
Beispiel #39
0
    def create_network(self):

        if self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "single_direction":
            # NR_OF_GRADIENTS = 15  # SH-Coeff
            NR_OF_GRADIENTS = 9
        elif self.HP.SEG_INPUT == "Peaks" and self.HP.TYPE == "combined":
            # NR_OF_GRADIENTS = 3
            NR_OF_GRADIENTS = 3*self.HP.NR_OF_CLASSES
            # NR_OF_GRADIENTS = self.HP.NR_OF_CLASSES
        else:
            NR_OF_GRADIENTS = 33

        print("Building network ...")
        print("(Model UNet)")
        # Lasagne Seed for Reproducibility
        L.random.set_rng(np.random.RandomState(1))

        net = self.get_UNet(n_input_channels=NR_OF_GRADIENTS, num_output_classes=self.HP.NR_OF_CLASSES, input_dim=self.HP.INPUT_DIM, base_n_filters=self.HP.UNET_NR_FILT)

        output_layer_for_loss = net["output_flat"]

        if self.HP.LOAD_WEIGHTS:
            print("Loading weights ... ({})".format(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)))
            with np.load(join(self.HP.EXP_PATH, self.HP.WEIGHTS_PATH)) as f: #if both pathes are absolute and beginning of pathes are the same, join will merge the beginning
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            L.layers.set_all_param_values(output_layer_for_loss, param_values)


        X_sym = T.tensor4()
        # y_sym = T.imatrix()     # (bs*x*y, nr_of_classes)
        y_sym = T.itensor4()    # (bs, nr_of_classes, x, y)
        y_sym_flat = y_sym.dimshuffle((0, 2, 3, 1))  # (bs, x, y, nr_of_classes)
        y_sym_flat = y_sym_flat.reshape((-1, y_sym_flat.shape[3]))  # (bs*x*y, nr_of_classes)

        # add some weight decay
        # l2_loss = L.regularization.regularize_network_params(output_layer_for_loss, L.regularization.l2) * 1e-5

        ##Train
        prediction_train = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=False)
        loss_vec_train = L.objectives.binary_crossentropy(prediction_train, y_sym_flat)
        loss_vec_train = loss_vec_train.mean(axis=1) #before: (bs*x*y, nrClasses) (= elementwise binary CE), after: (bs*x*y) (= same shape as output from categorical CE)
        # loss_train = loss_vec_train.mean() + l2_loss
        loss_train = loss_vec_train.mean()

        ##Test
        prediction_test = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=True)
        # prediction_test = L.layers.get_output(output_layer_for_loss, X_sym, deterministic=False)   #for Dropout Sampling
        loss_vec_test = L.objectives.binary_crossentropy(prediction_test, y_sym_flat)
        loss_vec_test = loss_vec_test.mean(axis=1)
        # loss_test = loss_vec_test.mean() + l2_loss
        loss_test = loss_vec_test.mean()

        ##Parameter Updates
        all_params = L.layers.get_all_params(output_layer_for_loss, trainable=True)
        learning_rate = theano.shared(np.float32(self.HP.LEARNING_RATE))
        updates = L.updates.adamax(loss_train, all_params, learning_rate)

        ##Convenience function
        output_train = L.layers.get_output(net["output"], X_sym, deterministic=False)
        output_test = L.layers.get_output(net["output"], X_sym, deterministic=True)

        #Calc F1 NEW (simpler)
        output_shuff_train = output_train.dimshuffle((0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_train = theano_binary_dice_per_instance_and_class(output_shuff_train, y_sym, dim=2, first_spatial_axis=2)  # (bs, nrClasses) -> dice for each class in each batch
        f1_train = T.mean(dice_scores_train)  #average over batches and classes

        output_shuff_test = output_test.dimshuffle((0, 3, 1, 2))  # (bs, nrClasses x, y)
        dice_scores_test = theano_binary_dice_per_instance_and_class(output_shuff_test, y_sym, dim=2, first_spatial_axis=2)  # (bs, nrClasses) -> dice for each class in each batch
        f1_test = T.mean(dice_scores_test)  # average over batches and classes

        #Define Functions
        train_fn = theano.function([X_sym, y_sym], [loss_train, prediction_train, f1_train], updates=updates)  # prediction_TEST, weil hier auch nicht Dropout will bei Score??
        predict_fn = theano.function([X_sym, y_sym], [loss_test, prediction_test, f1_test])

        get_probs = theano.function([X_sym], output_test)

        #Exporting variables
        self.learning_rate = learning_rate
        self.train = train_fn
        self.predict = predict_fn
        self.get_probs = get_probs    # (bs, x, y, nrClasses)
        self.net = net
        self.output = output_layer_for_loss     # this is used for saving weights (could probably also be simplified)
Beispiel #40
0
# MLP
# def Discriminator(inputs):
#     n_samples = inputs.shape[0]

#     output = lib.ops.linear.Linear('Discriminator.In', 64*64*3, DIM, inputs, initialization='glorot_he')
#     output = T.nnet.relu(output)
#     output = lib.ops.linear.Linear('Discriminator.2', DIM, DIM, output, initialization='he')
#     output = T.nnet.relu(output)
#     output = lib.ops.linear.Linear('Discriminator.3', DIM, DIM, output, initialization='he')
#     output = T.nnet.relu(output)
#     output = lib.ops.linear.Linear('Discriminator.4', DIM, 1, output, initialization='he')

#     return output.reshape((n_samples,))


real_data_int = T.itensor4('images')
real_data = (T.cast(real_data_int, 'float32')*(2./255) - 1.).reshape((-1,64*64*3))

fake_data = Generator(BATCH_SIZE)

disc_out = Discriminator(T.concatenate([real_data, fake_data], axis=0))
disc_real = disc_out[:BATCH_SIZE]
disc_fake = disc_out[BATCH_SIZE:]

gen_cost = -T.mean(Discriminator(Generator(2*BATCH_SIZE)))
disc_cost = T.mean(disc_fake) - T.mean(disc_real)

alpha = srng.uniform(
    size=(BATCH_SIZE,1), 
    low=0.,
    high=1.
Beispiel #41
0
DIM = 32
GRAD_CLIP = 1.
Q_LEVELS = 256
BATCH_SIZE = 20
PRINT_EVERY = 250
EPOCH = 100

OUT_DIR = '/Tmp/kumarkun/cifar10'
create_folder_if_not_there(OUT_DIR)

model = Model(name = "CIFAR10.pixelCNN")


is_train = T.scalar()
X = T.tensor4('X') # shape: (batchsize, channels, height, width)
X_r = T.itensor4('X_r')

X_transformed = X_r.dimshuffle(0,2,3,1)
input_layer = WrapperLayer(X.dimshuffle(0,2,3,1)) # input reshaped to (batchsize, height, width,3)

pixel_CNN = pixelConv(
	input_layer, 
	3, 
	DIM,
	Q_LEVELS = Q_LEVELS,
	name = model.name + ".pxCNN",
	num_layers = 12,
	)

model.add_layer(pixel_CNN)
Beispiel #42
0
report(v)

# 4-dimensional ndarray
v = T.tensor4(name=None, dtype=T.config.floatX)
report(v)

# constructors with fixed data type. (examples with tensor4)
# b: byte, w: word(16bit), l: int64, i: int32
# d:float64, f: float32, c: complex64, z: complex128
v = T.btensor4(name="v")
report(v)

v = T.wtensor4(name="v")
report(v)

v = T.itensor4(name="v")
report(v)

v = T.ltensor4(name="v")
report(v)

v = T.dtensor4(name="v")
report(v)

v = T.ftensor4(name="v")
report(v)

v = T.ctensor4(name="v")
report(v)

v = T.ztensor4(name="v")
Beispiel #43
0
# MLP
# def Discriminator(inputs):
#     n_samples = inputs.shape[0]

#     output = lib.ops.linear.Linear('Discriminator.In', 64*64*3, DIM, inputs, initialization='glorot_he')
#     output = T.nnet.relu(output)
#     output = lib.ops.linear.Linear('Discriminator.2', DIM, DIM, output, initialization='he')
#     output = T.nnet.relu(output)
#     output = lib.ops.linear.Linear('Discriminator.3', DIM, DIM, output, initialization='he')
#     output = T.nnet.relu(output)
#     output = lib.ops.linear.Linear('Discriminator.4', DIM, 1, output, initialization='he')

#     return output.reshape((n_samples,))

real_data_int = T.itensor4('images')
real_data = (T.cast(real_data_int, 'float32') * (2. / 255) - 1.).reshape(
    (-1, 64 * 64 * 3))

fake_data = Generator(BATCH_SIZE)

disc_out = Discriminator(T.concatenate([real_data, fake_data], axis=0))
disc_real = disc_out[:BATCH_SIZE]
disc_fake = disc_out[BATCH_SIZE:]

gen_cost = -T.mean(Discriminator(Generator(2 * BATCH_SIZE)))
disc_cost = T.mean(disc_fake) - T.mean(disc_real)

alpha = srng.uniform(size=(BATCH_SIZE, 1), low=0., high=1.)
differences = fake_data - real_data
interpolates = real_data + (alpha * differences)
Beispiel #44
0

if __name__ == '__main__':

    data_test_path = 'test'
    save_dir = 'visualisation'

    seq_num = 50
    n_class = 4
    size = 192

    # Build the network

    image_var = tensor5('image')
    image_pred_var = tensor5('image_pred')
    label_var = T.itensor4('label')
    image_seg_var = T.tensor4('image_seg')

    net = build_FCN_triple_branch_rnn(image_var,
                                      image_pred_var,
                                      image_seg_var,
                                      shape=(None, 1, size, size, seq_num),
                                      shape_seg=(None, 1, size, size))
    #model_file = 'model/FCN_VGG16_sz192_flow_simese_rnn_shared.npz'
    model_file = 'model/FCN_VGG16_sz192_triple_3d_rnn_warped_tmp.npz'
    with np.load(model_file) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    L.set_all_param_values([net['out'], net['outs']], param_values)

    test_prediction = L.get_output(net['outs'])
    test_loc = L.get_output(net['out'], deterministic=True)
Beispiel #45
0
PIXEL_CNN_LAYERS = 4

LR = 2e-4

BATCH_SIZE = 100
N_CHANNELS = 1
HEIGHT = 28
WIDTH = 28

TIMES = ('iters', 10*500, 1000*500)

lib.print_model_settings(locals().copy())

# inputs.shape: (batch size, n channels, height, width)
if MODE=='256ary':
    inputs = T.itensor4('inputs')
    inputs_float = inputs.astype(theano.config.floatX) * lib.floatX(2./255)
    inputs_float -= lib.floatX(0.5)
else:
    inputs = T.tensor4('inputs')
    inputs_float = inputs

output = lib.ops.conv2d.Conv2D(
    'InputConv', 
    input_dim=N_CHANNELS, 
    output_dim=DIM, 
    filter_size=7, 
    inputs=inputs_float, 
    mask_type=('a', N_CHANNELS),
    he_init=False
)
Beispiel #46
0
ne = 225961
de = 50
margin = 3
lr = 0.05
maxLen = 15
batchsize = 800
negative_sample_size = 1

itensor5 = T.TensorType("int32", (False,) * 5)
dtype = theano.config.floatX

"""trainning model"""
matrix_ndarray = np.random.uniform(-0.08, 0.08, (ne + 1, de)).astype(dtype)
subtract = np.array([1, -1])

idxs = T.itensor4("ids")
mask = itensor5("mask")
emb = theano.shared(name="embeddings", value=matrix_ndarray)
subset = emb[idxs]
# mask subset
subset_m = subset * mask
x = T.sum(subset_m, axis=3)
p = T.prod(x, axis=2)
s = T.sum(p, axis=2)
mul = theano.shared(name="mul", value=subtract)
diff = T.dot(s, mul)
cost = T.sum(T.maximum(0, margin - diff))

"""testing model"""
idxs_t = T.imatrix("ids")
mask_t = T.itensor3("mask")