def testData(): from data2cv import make_idx_data_cv data =readData('train_filtered.data') newdata =make_idx_data_cv(data,3,15) [train_rels, train_nums, train_sents, train_poss, train_eposs] = bags_decompose(newdata) pool_size = np.zeros((len(train_rels), 2), dtype='int32') for bagIndex, insNum in enumerate(train_nums): print('bag '+str(bagIndex)) print('isNum '+str(insNum)) pool_size[bagIndex,:] = train_eposs[bagIndex][0] print pool_size print newdata[0].positions print( newdata[0].sentences) print(newdata[0].entitiesPos)
def testData(): from data2cv import make_idx_data_cv data = readData('train_filtered.data') newdata = make_idx_data_cv(data, 3, 15) [train_rels, train_nums, train_sents, train_poss, train_eposs] = bags_decompose(newdata) pool_size = np.zeros((len(train_rels), 2), dtype='int32') for bagIndex, insNum in enumerate(train_nums): print('bag ' + str(bagIndex)) print('isNum ' + str(insNum)) pool_size[bagIndex, :] = train_eposs[bagIndex][0] print pool_size print newdata[0].positions print(newdata[0].sentences) print(newdata[0].entitiesPos)
import dataset dataset.data2pickle(traindatapath + '/train_data.txt', traindatapath + '/train_57w.p') if not os.path.isfile(traindatapath + '/wv.p'): import dataset dataset.wv2pickle('./data/wv.txt', 50, './data/wv.p') testData = cPickle.load(open(traindatapath + '/test_57w.p')) trainData = cPickle.load(open(traindatapath + '/train_57w.p')) # testData = testData[1:5] # trainData = trainData[1:15] tmp = traindatapath.split('_') test = data2cv.make_idx_data_cv( testData, parameterlist['filter_size'], int(parameterlist['max_sentence_word'])) train = data2cv.make_idx_data_cv( trainData, parameterlist['filter_size'], int(parameterlist['max_sentence_word'])) num_s = 0 for bag in train: num_s += bag.num print 'training set sentence : %d' % num_s print 'finished. ' print 'load Wv ... ' Wv = cPickle.load(open('./data/wv.p')) print 'finished.'
os.mkdir(resultdir) if not os.path.isfile(inputdir+'/test.p'): import dataset dataset.data2pickle(inputdir+'/test_filtered.data', inputdir+'/test.p') if not os.path.isfile(inputdir+'/train.p'): import dataset dataset.data2pickle(inputdir+'/train_filtered.data', inputdir+'/train.p') testData = cPickle.load(open(inputdir+'/test.p')) trainData = cPickle.load(open(inputdir+'/train.p')) # testData = testData[1:5] # trainData = trainData[1:15] tmp = inputdir.split('_') test = data2cv.make_idx_data_cv(testData, window_size, int(tmp[3])) train = data2cv.make_idx_data_cv(trainData, window_size, int(tmp[3])) print 'load Wv ...' Wv = cPickle.load(open(inputdir+'/'+str(dimension)+'/Wv.p')) rng = np.random.RandomState(3435) PF1 = np.asarray(rng.uniform(low=-1, high=1, size=[101, 5]), dtype=theano.config.floatX) padPF1 = np.zeros((1, 5)) PF1 = np.vstack((padPF1, PF1)) PF2 = np.asarray(rng.uniform(low=-1, high=1, size=[101, 5]), dtype=theano.config.floatX) padPF2 = np.zeros((1, 5)) PF2 = np.vstack((padPF2, PF2)) train_conv_net(train, test,