Beispiel #1
0
    def __init__(self, K, vocab_size, num_chars, W_init, 
            nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, 
            save_attn=False):
        self.nhidden = nhidden
        self.embed_dim = embed_dim
        self.dropout = dropout
        self.train_emb = train_emb
        self.char_dim = char_dim
        self.learning_rate = LEARNING_RATE
        self.num_chars = num_chars
        self.use_feat = use_feat
        self.save_attn = save_attn
        self.gating_fn = gating_fn

        self.use_chars = self.char_dim!=0
        if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim))

        doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \
                T.wtensor3('cand')
        docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
                T.bmatrix('c_mask')
        target_var = T.ivector('ans')
        feat_var = T.imatrix('feat')
        doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars')
        tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask')
        cloze_var = T.ivector('cloze')
        self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var,
                qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var]

        self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = (
                self.build_network(K, vocab_size, W_init))

        self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean()
        self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs, 
                target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean()
        eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val, 
                target_var).mean()

        self.params = L.get_all_params(self.network, trainable=True)
        
        updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate)

        self.train_fn = theano.function(self.inps,
                [self.loss_fn, self.eval_fn, self.predicted_probs], 
                updates=updates,
                on_unused_input='warn')
        self.validate_fn = theano.function(self.inps, 
                [loss_fn_val, eval_fn_val, predicted_probs_val]+attentions,
                on_unused_input='warn')
def BuildModel(modelSpecs, forTrain=True):
	rng = np.random.RandomState()

	## x is for sequential features and y for matrix (or pairwise) features
	x = T.tensor3('x')
	y = T.tensor4('y')

	## mask for x and y, respectively
	xmask = T.bmatrix('xmask')
	ymask = T.btensor3('ymask')

	xem = None
	##if any( k in modelSpecs['seq2matrixMode'] for k in ('SeqOnly', 'Seq+SS') ):
	if config.EmbeddingUsed(modelSpecs):
		xem = T.tensor3('xem')
		distancePredictor = ResNet4DistMatrix( rng, seqInput=x,
											   matrixInput=y, mask_seq=xmask, mask_matrix=ymask,
											   embedInput=xem, modelSpecs=modelSpecs )
	else:
		distancePredictor = ResNet4DistMatrix( rng, seqInput=x,
											   matrixInput=y, mask_seq=xmask, mask_matrix=ymask,
											   modelSpecs=modelSpecs )

	## labelList is a list of label tensors, each having shape (batchSize, seqLen, seqLen) or (batchSize, seqLen, seqLen, valueDims[response] )
	labelList = []
	if forTrain:
		## when this model is used for training. We need to define the label variable
		for response in modelSpecs['responses']:
			labelType = Response2LabelType(response)
			rValDims = config.responseValueDims[labelType]

			if labelType.startswith('Discrete'):
				if rValDims > 1:
				## if one response is a vector, then we use a 4-d tensor
				## wtensor is for 16bit integer
					labelList.append( T.wtensor4('Tlabel4' + response ) )
				else:
					labelList.append( T.wtensor3('Tlabel4' + response ) )
			else:
				if rValDims > 1:
					labelList.append( T.tensor4('Tlabel4' + response ) )
				else:
					labelList.append( T.tensor3('Tlabel4' + response ) )

	## weightList is a list of label weight tensors, each having shape (batchSize, seqLen, seqLen)
	weightList = []
	if len(labelList)>0 and modelSpecs['UseSampleWeight']:
		weightList = [ T.tensor3('Tweight4'+response) for response in modelSpecs['responses'] ]

	## for prediction, both labelList and weightList are empty
	return distancePredictor, x, y, xmask, ymask, xem, labelList, weightList
Beispiel #3
0
    def __init__(self, K, vocab_size, W_init, regularizer, rlambda, nhidden,
                 embed_dim, dropout, train_emb, subsample):
        self.nhidden = nhidden
        self.embed_dim = embed_dim
        self.dropout = dropout
        self.train_emb = train_emb
        self.subsample = subsample
        norm = lasagne.regularization.l2 if regularizer == 'l2' else lasagne.regularization.l1
        if W_init is None:
            W_init = lasagne.init.GlorotNormal().sample(
                (vocab_size, self.embed_dim))

        doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3(
            'quer'), T.wtensor3('cand')
        docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
                T.bmatrix('c_mask')
        target_var = T.ivector('ans')

        if rlambda > 0.:
            W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape)
        else:
            W_pert = W_init
        predicted_probs, predicted_probs_val, self.doc_net, self.q_net, W_emb = self.build_network(
            K, vocab_size, doc_var, query_var, cand_var, docmask_var,
            qmask_var, candmask_var, W_pert)

        loss_fn = T.nnet.categorical_crossentropy(predicted_probs, target_var).mean() + \
                rlambda*norm(W_emb-W_init)
        eval_fn = lasagne.objectives.categorical_accuracy(
            predicted_probs, target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \
                rlambda*norm(W_emb-W_init)
        eval_fn_val = lasagne.objectives.categorical_accuracy(
            predicted_probs_val, target_var).mean()

        params = L.get_all_params(self.doc_net, trainable=True) + \
                L.get_all_params(self.q_net, trainable=True)

        updates = lasagne.updates.adam(loss_fn,
                                       params,
                                       learning_rate=LEARNING_RATE)

        self.train_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \
                qmask_var, candmask_var],
                [loss_fn, eval_fn, predicted_probs],
                updates=updates)
        self.validate_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \
                qmask_var, candmask_var],
                [loss_fn_val, eval_fn_val, predicted_probs_val])
    def __init__(self, K, vocab_size, num_chars, W_init, regularizer, rlambda,
                 nhidden, embed_dim, dropout, train_emb, subsample, char_dim,
                 use_feat):
        self.nhidden = nhidden
        self.embed_dim = embed_dim
        self.dropout = dropout
        self.train_emb = train_emb
        self.subsample = subsample
        self.char_dim = char_dim
        self.learning_rate = LEARNING_RATE
        self.num_chars = num_chars
        self.use_feat = use_feat

        norm = lasagne.regularization.l2 if regularizer == 'l2' else lasagne.regularization.l1
        self.use_chars = self.char_dim != 0
        if W_init is None:
            W_init = lasagne.init.GlorotNormal().sample(
                (vocab_size, self.embed_dim))

        doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \
                T.wtensor3('cand')
        docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
                T.bmatrix('c_mask')
        target_var = T.ivector('ans')
        feat_var = T.imatrix('feat')
        doc_toks, qry_toks = T.imatrix('dchars'), T.imatrix('qchars')
        tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask')
        cloze_var = T.ivector('cloze')
        self.inps = [
            doc_var, doc_toks, query_var, qry_toks, cand_var, target_var,
            docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var,
            cloze_var
        ]

        if rlambda > 0.:
            W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape)
        else:
            W_pert = W_init
        self.predicted_probs, predicted_probs_val, self.doc_net, self.q_net, W_emb = (
            self.build_network(K, vocab_size, W_pert))

        self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() + \
                rlambda*norm(W_emb-W_init)
        self.eval_fn = lasagne.objectives.categorical_accuracy(
            self.predicted_probs, target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \
                rlambda*norm(W_emb-W_init)
        eval_fn_val = lasagne.objectives.categorical_accuracy(
            predicted_probs_val, target_var).mean()

        self.params = L.get_all_params([self.doc_net] + self.q_net,
                                       trainable=True)

        updates = lasagne.updates.adam(self.loss_fn,
                                       self.params,
                                       learning_rate=self.learning_rate)

        self.train_fn = theano.function(
            self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs],
            updates=updates,
            on_unused_input='warn')
        self.validate_fn = theano.function(
            self.inps, [loss_fn_val, eval_fn_val, predicted_probs_val],
            on_unused_input='warn')
Beispiel #5
0
def TestResNet4DistMatrix():
    x = T.tensor3('x')
    y = T.tensor4('y')
    xmask = T.bmatrix('xmask')
    ymask = T.btensor3('ymask')
    selection = T.wtensor3('selection')

    import cPickle
    fh = open('seqDataset4HF.pkl')
    data = cPickle.load(fh)
    fh.close()

    distancePredictor = ResNet4DistMatrix(rng=np.random.RandomState(),
                                          seqInput=x,
                                          matrixInput=y,
                                          n_in_seq=data[0][0].shape[2],
                                          n_in_matrix=data[1][0].shape[3],
                                          n_hiddens_seq=[3, 5],
                                          n_hiddens_matrix=[2],
                                          hwsz_seq=4,
                                          hwsz_matrix=4,
                                          mask_seq=xmask,
                                          mask_matrix=ymask)
    """
	f = theano.function([x, y, xmask, ymask], distancePredictor.output_1d)
	g = theano.function([x, y, xmask, ymask], distancePredictor.output_2d)
	"""

    dataLen = 300
    batchSize = 60
    a = np.random.uniform(0, 1, (batchSize, dataLen, 20)).astype(np.float32)
    b = np.random.uniform(0, 1,
                          (batchSize, dataLen, dataLen, 3)).astype(np.float32)
    amask = np.zeros((batchSize, 0)).astype(np.int8)
    bmask = np.zeros((batchSize, 0, dataLen)).astype(np.int8)
    sel = np.ones((batchSize, dataLen, dataLen)).astype(np.int8)
    #print a
    #print b
    c = np.random.uniform(0, 3, (batchSize, dataLen, dataLen)).round().astype(
        np.int8)
    np.putmask(c, c >= 2, 2)
    """
	c[0, 1, 13]=1
	c[0, 2, 15]=1
	c[0, 4, 16]=1
	
	c[0, 1, 27]=1
	c[0, 2, 28]=1
	c[0, 4, 29]=1
	
	c[1, 0, 13]=2
	c[1, 1, 15]=2
	c[1, 3, 16]=2
	
	c[2, 0, 23]=2
	c[2, 1, 25]=2
	c[2, 3, 26]=2
	"""
    #sel = c

    #out1d = f(a, b, amask, bmask)
    #out2d = g(a, b, amask, bmask)

    #print out1d
    #print out2d

    z = T.btensor3('z')
    loss = distancePredictor.loss(z, selection)
    errs = distancePredictor.ErrorsByRange(z)
    accs = distancePredictor.TopAccuracyByRange(z)
    confM = distancePredictor.confusionMatrix(z)

    h = theano.function([x, y, xmask, ymask, selection, z],
                        confM,
                        on_unused_input='ignore')
    #l, e, accu = h(a, b, amask, bmask, sel, c)

    cms = []
    for i in np.arange(5):
        cm = h(data[0][i], data[1][i], data[2][i], data[3][i], data[4][i],
               data[5][i])
        print(cm)
        cms.append(cm)

    sumofcms = np.sum(cms, axis=0) * 1.

    for i in range(sumofcms.shape[0]):
        sumofcms[i] /= np.sum(sumofcms[i])

    confusions = sumofcms
    print(confusions)
    print(np.sum(confusions[0]))
    print(np.sum(confusions[1]))
    print(np.sum(confusions[2]))
    """
Beispiel #6
0
    def __init__(
            self,
            rng,
            batchsize=100,
            activation=relu
    ):

        import char_load
        (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len, \
         k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt")

        dim_word = 30
        dim_char = 5
        cl_word = 300
        cl_char = 50
        k_word = k_wrd
        k_char = k_chr

        data_train_word, \
        data_test_word, \
        data_train_char, \
        data_test_char, \
        target_train, \
        target_test \
            = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1)

        x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True)
        x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True)
        y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True)
        x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True)
        x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True)
        y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True)

        self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize
        self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize

        """symbol definition"""
        index = T.iscalar()
        x_wrd = T.wmatrix('x_wrd')
        x_chr = T.wtensor3('x_chr')
        y = T.bvector('y')
        train = T.iscalar('train')

        """network definition"""
        layer_char_embed_input = x_chr  # .reshape((batchsize, max_sen_len, max_word_len))

        layer_char_embed = EmbedIDLayer(
            rng,
            layer_char_embed_input,
            n_input=char_cnt,
            n_output=dim_char
        )

        layer1_input = layer_char_embed.output.reshape(
            (batchsize * max_sen_len, 1, max_word_len, dim_char)
        )

        layer1 = ConvolutionalLayer(
            rng,
            layer1_input,
            filter_shape=(cl_char, 1, k_char, dim_char),  # cl_charフィルタ数
            image_shape=(batchsize * max_sen_len, 1, max_word_len, dim_char)
        )

        layer2 = MaxPoolingLayer(
            layer1.output,
            poolsize=(max_word_len - k_char + 1, 1)
        )

        layer_word_embed_input = x_wrd  # .reshape((batchsize, max_sen_len))

        layer_word_embed = EmbedIDLayer(
            rng,
            layer_word_embed_input,
            n_input=word_cnt,
            n_output=dim_word
        )

        layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word))
        layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char))

        layer3_input = T.concatenate(
            [layer3_word_input,
             layer3_char_input],
            axis=3
        )  # .reshape((batchsize, 1, max_sen_len, dim_word+cl_char))

        layer3 = ConvolutionalLayer(
            rng,
            layer3_input,
            filter_shape=(cl_word, 1, k_word, dim_word + cl_char),  # 1は入力チャネル数
            image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char),
            activation=activation
        )

        layer4 = MaxPoolingLayer(
            layer3.output,
            poolsize=(max_sen_len - k_word + 1, 1)
        )

        layer5_input = layer4.output.reshape((batchsize, cl_word))

        layer5 = FullyConnectedLayer(
            rng,
            dropout(rng, layer5_input, train),
            n_input=cl_word,
            n_output=50,
            activation=activation
        )

        layer6_input = layer5.output

        layer6 = FullyConnectedLayer(
            rng,
            dropout(rng, layer6_input, train, p=0.1),
            n_input=50,
            n_output=2,
            activation=None
        )

        result = Result(layer6.output, y)
        loss = result.negative_log_likelihood()
        accuracy = result.accuracy()
        params = layer6.params \
                 + layer5.params \
                 + layer3.params \
                 + layer_word_embed.params \
                 + layer1.params \
                 + layer_char_embed.params
        updates = RMSprop(learning_rate=0.001, params=params).updates(loss)

        self.train_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            updates=updates,
            givens={
                x_wrd: x_train_word[index * batchsize: (index + 1) * batchsize],
                x_chr: x_train_char[index * batchsize: (index + 1) * batchsize],
                y: y_train[index * batchsize: (index + 1) * batchsize],
                train: np.cast['int32'](1)
            }
        )

        self.test_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            givens={
                x_wrd: x_test_word[index * batchsize: (index + 1) * batchsize],
                x_chr: x_test_char[index * batchsize: (index + 1) * batchsize],
                y: y_test[index * batchsize: (index + 1) * batchsize],
                train: np.cast['int32'](0)
            }
        )
Beispiel #7
0
	def __init__(
		self,
		rng,
		batchsize=100,
		activation=relu
	):
		
		import char_load
		(num_sent, char_cnt, word_cnt, max_word_len, max_sen_len,\
	    k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt")

		dim_word = 30
		dim_char = 5
		cl_word = 300
		cl_char = 50
		k_word = k_wrd
		k_char = k_chr

		data_train_word,\
		data_test_word,\
		data_train_char,\
		data_test_char,\
		target_train,\
		target_test\
		= train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1)

		x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True)
		x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True)
		y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True)
		x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True)
		x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True)
		y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True)


		self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize
		self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize


		
		"""symbol definition"""
		index = T.iscalar()
		x_wrd = T.wmatrix('x_wrd')
		x_chr = T.wtensor3('x_chr')
		y = T.bvector('y')
		train = T.iscalar('train')

		"""network definition"""
		layer_char_embed_input = x_chr#.reshape((batchsize, max_sen_len, max_word_len))

		layer_char_embed = EmbedIDLayer(
			rng,
			layer_char_embed_input,
			n_input=char_cnt,
			n_output=dim_char
		)

		layer1_input = layer_char_embed.output.reshape(
			(batchsize*max_sen_len, 1, max_word_len, dim_char)
		)

		layer1 = ConvolutionalLayer(
			rng,
			layer1_input,
			filter_shape=(cl_char, 1, k_char, dim_char),# cl_charフィルタ数
			image_shape=(batchsize*max_sen_len, 1, max_word_len, dim_char)
		)

		layer2 = MaxPoolingLayer(
			layer1.output,
			poolsize=(max_word_len-k_char+1, 1)
		)

		layer_word_embed_input = x_wrd #.reshape((batchsize, max_sen_len))

		layer_word_embed = EmbedIDLayer(
			rng,
			layer_word_embed_input,
			n_input=word_cnt,
			n_output=dim_word
		)

		layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word))
		layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char))


		layer3_input = T.concatenate(
			[layer3_word_input,
			 layer3_char_input],
			axis=3
		)#.reshape((batchsize, 1, max_sen_len, dim_word+cl_char))


		layer3 = ConvolutionalLayer(
			rng,
			layer3_input,
			filter_shape=(cl_word, 1, k_word, dim_word + cl_char),#1は入力チャネル数
			image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char),
			activation=activation
		)

		layer4 = MaxPoolingLayer(
			layer3.output,
			poolsize=(max_sen_len-k_word+1, 1)
		)

		layer5_input = layer4.output.reshape((batchsize, cl_word))

		layer5 = FullyConnectedLayer(
			rng,
			dropout(rng, layer5_input, train),
			n_input=cl_word,
			n_output=50,
			activation=activation
		)

		layer6_input = layer5.output

		layer6 = FullyConnectedLayer(
			rng,
			dropout(rng, layer6_input, train, p=0.1),
			n_input=50,
			n_output=2,
			activation=None
		)

		result = Result(layer6.output, y)
		loss = result.negative_log_likelihood()
		accuracy = result.accuracy()
		params = layer6.params\
				+layer5.params\
				+layer3.params\
				+layer_word_embed.params\
				+layer1.params\
				+layer_char_embed.params
		updates = RMSprop(learning_rate=0.001, params=params).updates(loss)

		self.train_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			updates=updates,
			givens={
				x_wrd: x_train_word[index*batchsize: (index+1)*batchsize],
				x_chr: x_train_char[index*batchsize: (index+1)*batchsize],
				y: y_train[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](1)
			}
		)

		self.test_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			givens={
				x_wrd: x_test_word[index*batchsize: (index+1)*batchsize],
				x_chr: x_test_char[index*batchsize: (index+1)*batchsize],
				y: y_test[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](0)
			}
		)
def BuildModel(modelSpecs, forTrain=True):
    rng = np.random.RandomState()

    ## x is for sequential features and y for matrix (or pairwise) features
    x = T.tensor3('x')
    y = T.tensor4('y')

    ## mask for x and y, respectively
    xmask = T.bmatrix('xmask')
    ymask = T.btensor3('ymask')

    xem = None
    ##if any( k in modelSpecs['seq2matrixMode'] for k in ('SeqOnly', 'Seq+SS') ):
    if config.EmbeddingUsed(modelSpecs):
        xem = T.tensor3('xem')

## bounding box for crop of a big protein distance matrix. This box allows crop at any position.
    box = None
    if forTrain:
        box = T.ivector('boundingbox')

## trainByRefLoss can be either 1 or -1. When this variable exists, we train the model using both reference loss and the loss of real data
    trainByRefLoss = None
    if forTrain and config.TrainByRefLoss(modelSpecs):
        trainByRefLoss = T.iscalar('trainByRefLoss')

    distancePredictor = ResNet4DistMatrix(rng,
                                          seqInput=x,
                                          matrixInput=y,
                                          mask_seq=xmask,
                                          mask_matrix=ymask,
                                          embedInput=xem,
                                          boundingbox=box,
                                          modelSpecs=modelSpecs)

    ## labelList is a list of label tensors, each having shape (batchSize, seqLen, seqLen) or (batchSize, seqLen, seqLen, valueDims[response] )
    labelList = []
    if forTrain:
        ## when this model is used for training. We need to define the label variable
        for response in modelSpecs['responses']:
            labelType = Response2LabelType(response)
            rValDims = GetResponseValueDims(response)

            if labelType.startswith('Discrete'):
                if rValDims > 1:
                    ## if one response is a vector, then we use a 4-d tensor
                    ## wtensor is for 16bit integer
                    labelList.append(T.wtensor4('Tlabel4' + response))
                else:
                    labelList.append(T.wtensor3('Tlabel4' + response))
            else:
                if rValDims > 1:
                    labelList.append(T.tensor4('Tlabel4' + response))
                else:
                    labelList.append(T.tensor3('Tlabel4' + response))

    ## weightList is a list of label weight tensors, each having shape (batchSize, seqLen, seqLen)
    weightList = []
    if len(labelList) > 0 and config.UseSampleWeight(modelSpecs):
        weightList = [
            T.tensor3('Tweight4' + response)
            for response in modelSpecs['responses']
        ]

## for prediction, both labelList and weightList are empty
    if forTrain:
        return distancePredictor, x, y, xmask, ymask, xem, labelList, weightList, box, trainByRefLoss
    else:
        return distancePredictor, x, y, xmask, ymask, xem
Beispiel #9
0
    def __init__(self, rng, batchsize=100, activation=relu):

        import loader
        (numsent, charcnt, wordcnt, maxwordlen, maxsenlen,\
        kchr, kwrd, xchr, xwrd, y) = loader.read("tweets_clean.txt")

        dimword = 30
        dimchar = 5
        clword = 300
        clchar = 50
        kword = kwrd
        kchar = kchr

        datatrainword,\
        datatestword,\
        datatrainchar,\
        datatestchar,\
        targettrain,\
        targettest\
        = train_test_split(xwrd, xchr, y, random_state=1234, test_size=0.1)

        xtrainword = theano.shared(np.asarray(datatrainword, dtype='int16'),
                                   borrow=True)
        xtrainchar = theano.shared(np.asarray(datatrainchar, dtype='int16'),
                                   borrow=True)
        ytrain = theano.shared(np.asarray(targettrain, dtype='int8'),
                               borrow=True)
        xtestword = theano.shared(np.asarray(datatestword, dtype='int16'),
                                  borrow=True)
        xtestchar = theano.shared(np.asarray(datatestchar, dtype='int16'),
                                  borrow=True)
        ytest = theano.shared(np.asarray(targettest, dtype='int8'),
                              borrow=True)

        self.ntrainbatches = xtrainword.get_value(
            borrow=True).shape[0] / batchsize
        self.ntestbatches = xtestword.get_value(
            borrow=True).shape[0] / batchsize

        index = T.iscalar()
        xwrd = T.wmatrix('xwrd')
        xchr = T.wtensor3('xchr')
        y = T.bvector('y')
        train = T.iscalar('train')

        layercharembedinput = xchr

        layercharembed = EmbedIDLayer(rng,
                                      layercharembedinput,
                                      ninput=charcnt,
                                      noutput=dimchar)

        layer1input = layercharembed.output.reshape(
            (batchsize * maxsenlen, 1, maxwordlen, dimchar))

        layer1 = ConvolutionalLayer(rng,
                                    layer1input,
                                    filter_shape=(clchar, 1, kchar, dimchar),
                                    image_shape=(batchsize * maxsenlen, 1,
                                                 maxwordlen, dimchar))

        layer2 = MaxPoolingLayer(layer1.output,
                                 poolsize=(maxwordlen - kchar + 1, 1))

        layerwordembedinput = xwrd

        layerwordembed = EmbedIDLayer(rng,
                                      layerwordembedinput,
                                      ninput=wordcnt,
                                      noutput=dimword)

        layer3wordinput = layerwordembed.output.reshape(
            (batchsize, 1, maxsenlen, dimword))
        layer3charinput = layer2.output.reshape(
            (batchsize, 1, maxsenlen, clchar))

        layer3input = T.concatenate([layer3wordinput, layer3charinput], axis=3)

        layer3 = ConvolutionalLayer(rng,
                                    layer3input,
                                    filter_shape=(clword, 1, kword,
                                                  dimword + clchar),
                                    image_shape=(batchsize, 1, maxsenlen,
                                                 dimword + clchar),
                                    activation=activation)

        layer4 = MaxPoolingLayer(layer3.output,
                                 poolsize=(maxsenlen - kword + 1, 1))

        layer5input = layer4.output.reshape((batchsize, clword))

        layer5 = FullyConnectedLayer(rng,
                                     dropout(rng, layer5input, train),
                                     ninput=clword,
                                     noutput=50,
                                     activation=activation)

        layer6input = layer5.output

        layer6 = FullyConnectedLayer(rng,
                                     dropout(rng, layer6input, train, p=0.1),
                                     ninput=50,
                                     noutput=2,
                                     activation=None)

        result = Result(layer6.output, y)
        loss = result.negativeloglikelihood()
        accuracy = result.accuracy()
        params = layer6.params\
                +layer5.params\
                +layer3.params\
                +layerwordembed.params\
                +layer1.params\
                +layercharembed.params
        updates = RMSprop(learningrate=0.001, params=params).updates(loss)

        self.trainmodel = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            updates=updates,
            givens={
                xwrd: xtrainword[index * batchsize:(index + 1) * batchsize],
                xchr: xtrainchar[index * batchsize:(index + 1) * batchsize],
                y: ytrain[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](1)
            })

        self.testmodel = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            givens={
                xwrd: xtestword[index * batchsize:(index + 1) * batchsize],
                xchr: xtestchar[index * batchsize:(index + 1) * batchsize],
                y: ytest[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](0)
            })