def dealWithOneDoc(rng, doc): sentenceList = doc sentenceSize = ttList.length(sentenceList) sentenceResults, _ = theano.scan( fn=lambda i, sl: dealWithSentence(rng, sl[i]), non_sequences=[sentenceList], sequences=[T.arange(sentenceSize, dtype='int64')]) sentenceResults = sentenceResults.dimshuffle([0, 2]) docW = theano.shared(numpy.asarray(rng.uniform(low=-0.2, high=0.2, size=(2, 1, 2)), dtype=theano.config.floatX), borrow=True) doc_out = theano.tensor.signal.conv.conv2d(input=sentenceResults, filters=docW) # docPool = T.max(doc_out, axis = [0]) docPool = theano.tensor.signal.downsample.max_pool_2d(doc_out, (1000, 1)) doc_embedding = T.reshape(docPool, (1, 2 * 5)).dimshuffle([1]) # doc_embeddingp = printing.Print('doc_embedding') # doc_embedding = doc_embeddingp(doc_embedding) return doc_embedding
def dealWithOneDoc(rng, doc): sentenceList = doc sentenceSize = ttList.length(sentenceList) sentenceResults, _ = theano.scan(fn=lambda i, sl: dealWithSentence(rng, sl[i]), non_sequences=[sentenceList], sequences=[T.arange(sentenceSize, dtype='int64')]) sentenceResults = sentenceResults.dimshuffle([0, 2]) docW = theano.shared( numpy.asarray( rng.uniform(low=-0.2, high=0.2, size=(2, 1, 2)), dtype=theano.config.floatX ), borrow=True ) doc_out = theano.tensor.signal.conv.conv2d(input=sentenceResults, filters=docW) # docPool = T.max(doc_out, axis = [0]) docPool = theano.tensor.signal.downsample.max_pool_2d(doc_out, (1000, 1)) doc_embedding = T.reshape(docPool, (1, 2 * 5)).dimshuffle([1]) # doc_embeddingp = printing.Print('doc_embedding') # doc_embedding = doc_embeddingp(doc_embedding) return doc_embedding
def main2(): a = [[[[1, 2, 3]], [[4, 5]]]] tl = ttList.TypedListType(ttList.TypedListType(theano.tensor.fmatrix))() o = ttList.length(tl) f = theano.function([tl], o) testRes = f(a) print testRes print "All finished!"
def main2(): a = [ [ [[1, 2, 3]], [[4, 5]] ] ] tl = ttList.TypedListType(ttList.TypedListType(theano.tensor.fmatrix))() o = ttList.length(tl) f = theano.function([tl], o) testRes = f(a) print testRes print "All finished!"
def main(): rng = numpy.random.RandomState(23455) a = [[[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], [[1, 2, 3, 4], [1, 2, 3, 4]]]] docList = ttList.TypedListType(ttList.TypedListType(T.fmatrix))("docList") docSize = ttList.length(docList) modelResults, _ = theano.scan(fn=lambda i, tl: dealWithOneDoc(rng, tl[i]), non_sequences=[docList], sequences=[T.arange(docSize, dtype='int64')]) testFunc = theano.function([docList], modelResults) rrrr = testFunc(a) print rrrr[0] print "All finished!"
def main(): rng = numpy.random.RandomState(23455) a = [ [ [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], [[1, 2, 3, 4], [1, 2, 3, 4]] ] ] docList = ttList.TypedListType(ttList.TypedListType(T.fmatrix))("docList") docSize = ttList.length(docList) modelResults, _ = theano.scan(fn=lambda i, tl: dealWithOneDoc(rng, tl[i]), non_sequences=[docList], sequences=[T.arange(docSize, dtype='int64')]) testFunc = theano.function([docList], modelResults); rrrr = testFunc(a) print rrrr[0] print "All finished!"
emb_matrix_path = 'embedding_matrix_gensim_300D.npy' Vocabulary_size = word_dim x_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, Vocabulary_size) x_test = x_train[400000:500000] x_train = x_train[0:400000] #iterator counter t = theano.shared(name = 't', value = np.array(0).astype('int32')) x = tlist.TypedListType(T.ivector)() #wl = T.ivector('wl') l = tlist.length(x) def get_shapes(index, x): shape_ex = T.shape(x[index]) return shape_ex[0] x_shapes, last_output = theano.scan(fn=get_shapes, outputs_info=None, non_sequences = [x], sequences = [T.arange(batch_size, dtype = 'int64')] ) #f = theano.function([x], T.shape(x[T.argmax(x_shapes)])) max_x_idx = T.argmax(x_shapes)