Exemple #1
0
def dealWithOneDoc(rng, doc):
    sentenceList = doc
    sentenceSize = ttList.length(sentenceList)
    sentenceResults, _ = theano.scan(
        fn=lambda i, sl: dealWithSentence(rng, sl[i]),
        non_sequences=[sentenceList],
        sequences=[T.arange(sentenceSize, dtype='int64')])
    sentenceResults = sentenceResults.dimshuffle([0, 2])

    docW = theano.shared(numpy.asarray(rng.uniform(low=-0.2,
                                                   high=0.2,
                                                   size=(2, 1, 2)),
                                       dtype=theano.config.floatX),
                         borrow=True)

    doc_out = theano.tensor.signal.conv.conv2d(input=sentenceResults,
                                               filters=docW)

    #     docPool = T.max(doc_out, axis = [0])
    docPool = theano.tensor.signal.downsample.max_pool_2d(doc_out, (1000, 1))
    doc_embedding = T.reshape(docPool, (1, 2 * 5)).dimshuffle([1])

    #     doc_embeddingp = printing.Print('doc_embedding')
    #     doc_embedding = doc_embeddingp(doc_embedding)

    return doc_embedding
def dealWithOneDoc(rng, doc):
    sentenceList = doc
    sentenceSize = ttList.length(sentenceList)
    sentenceResults, _ = theano.scan(fn=lambda i, sl: dealWithSentence(rng, sl[i]),
                        non_sequences=[sentenceList],
                         sequences=[T.arange(sentenceSize, dtype='int64')])
    sentenceResults = sentenceResults.dimshuffle([0, 2])
    
    docW = theano.shared(
        numpy.asarray(
            rng.uniform(low=-0.2, high=0.2, size=(2, 1, 2)),
            dtype=theano.config.floatX
        ),
        borrow=True
    )
    
    doc_out = theano.tensor.signal.conv.conv2d(input=sentenceResults, filters=docW)
    
#     docPool = T.max(doc_out, axis = [0])
    docPool = theano.tensor.signal.downsample.max_pool_2d(doc_out, (1000, 1))
    doc_embedding = T.reshape(docPool, (1, 2 * 5)).dimshuffle([1])
    
#     doc_embeddingp = printing.Print('doc_embedding')
#     doc_embedding = doc_embeddingp(doc_embedding)
    
    return doc_embedding
Exemple #3
0
def main2():
    a = [[[[1, 2, 3]], [[4, 5]]]]
    tl = ttList.TypedListType(ttList.TypedListType(theano.tensor.fmatrix))()
    o = ttList.length(tl)
    f = theano.function([tl], o)
    testRes = f(a)
    print testRes
    print "All finished!"
def main2():
    a = [
            [
                [[1, 2, 3]], [[4, 5]]
             ]
         ]
    tl = ttList.TypedListType(ttList.TypedListType(theano.tensor.fmatrix))()
    o = ttList.length(tl)
    f = theano.function([tl], o)
    testRes = f(a)
    print testRes
    print "All finished!"
Exemple #5
0
def main():
    rng = numpy.random.RandomState(23455)
    a = [[[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
          [[1, 2, 3, 4], [1, 2, 3, 4]]]]

    docList = ttList.TypedListType(ttList.TypedListType(T.fmatrix))("docList")
    docSize = ttList.length(docList)

    modelResults, _ = theano.scan(fn=lambda i, tl: dealWithOneDoc(rng, tl[i]),
                                  non_sequences=[docList],
                                  sequences=[T.arange(docSize, dtype='int64')])

    testFunc = theano.function([docList], modelResults)
    rrrr = testFunc(a)
    print rrrr[0]
    print "All finished!"
def main():
    rng = numpy.random.RandomState(23455)
    a = [
                 [
                  [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
                  [[1, 2, 3, 4], [1, 2, 3, 4]]
                 ]
                ]


    docList = ttList.TypedListType(ttList.TypedListType(T.fmatrix))("docList")
    docSize = ttList.length(docList)
    
    modelResults, _ = theano.scan(fn=lambda i, tl: dealWithOneDoc(rng, tl[i]),
                        non_sequences=[docList],
                         sequences=[T.arange(docSize, dtype='int64')])
    
    testFunc = theano.function([docList], modelResults);
    rrrr = testFunc(a)
    print rrrr[0]
    print "All finished!"
Exemple #7
0
emb_matrix_path = 'embedding_matrix_gensim_300D.npy'

Vocabulary_size = word_dim
x_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, Vocabulary_size)



x_test = x_train[400000:500000]
x_train = x_train[0:400000]

#iterator counter
t = theano.shared(name = 't', value = np.array(0).astype('int32'))
x = tlist.TypedListType(T.ivector)()

#wl = T.ivector('wl')
l = tlist.length(x)

def get_shapes(index, x):

	shape_ex = T.shape(x[index])
	return shape_ex[0]

x_shapes, last_output = theano.scan(fn=get_shapes, 
							outputs_info=None, 
							non_sequences = [x],
							sequences = [T.arange(batch_size, dtype = 'int64')]
						    )

#f = theano.function([x], T.shape(x[T.argmax(x_shapes)])) 	

max_x_idx = T.argmax(x_shapes)