Ejemplo n.º 1
0
def testData():
    from data2cv import make_idx_data_cv
    data =readData('train_filtered.data')
    newdata =make_idx_data_cv(data,3,15)
    [train_rels, train_nums, train_sents, train_poss, train_eposs] = bags_decompose(newdata)
    pool_size = np.zeros((len(train_rels), 2), dtype='int32')
    for bagIndex, insNum in enumerate(train_nums):
        print('bag '+str(bagIndex))
        print('isNum '+str(insNum))
        pool_size[bagIndex,:] = train_eposs[bagIndex][0]
    print pool_size


    print newdata[0].positions
    print( newdata[0].sentences)
    print(newdata[0].entitiesPos)
Ejemplo n.º 2
0
def testData():
    from data2cv import make_idx_data_cv
    data = readData('train_filtered.data')
    newdata = make_idx_data_cv(data, 3, 15)
    [train_rels, train_nums, train_sents, train_poss,
     train_eposs] = bags_decompose(newdata)
    pool_size = np.zeros((len(train_rels), 2), dtype='int32')
    for bagIndex, insNum in enumerate(train_nums):
        print('bag ' + str(bagIndex))
        print('isNum ' + str(insNum))
        pool_size[bagIndex, :] = train_eposs[bagIndex][0]
    print pool_size

    print newdata[0].positions
    print(newdata[0].sentences)
    print(newdata[0].entitiesPos)
Ejemplo n.º 3
0
            import dataset
            dataset.data2pickle(traindatapath + '/train_data.txt',
                                traindatapath + '/train_57w.p')

        if not os.path.isfile(traindatapath + '/wv.p'):
            import dataset
            dataset.wv2pickle('./data/wv.txt', 50, './data/wv.p')

        testData = cPickle.load(open(traindatapath + '/test_57w.p'))
        trainData = cPickle.load(open(traindatapath + '/train_57w.p'))
        # testData = testData[1:5]
        # trainData = trainData[1:15]
        tmp = traindatapath.split('_')

        test = data2cv.make_idx_data_cv(
            testData, parameterlist['filter_size'],
            int(parameterlist['max_sentence_word']))
        train = data2cv.make_idx_data_cv(
            trainData, parameterlist['filter_size'],
            int(parameterlist['max_sentence_word']))
        num_s = 0
        for bag in train:
            num_s += bag.num
        print 'training set sentence : %d' % num_s

        print 'finished. '

        print 'load Wv ...  '
        Wv = cPickle.load(open('./data/wv.p'))
        print 'finished.'
Ejemplo n.º 4
0
        os.mkdir(resultdir)

    if not os.path.isfile(inputdir+'/test.p'):
        import dataset
        dataset.data2pickle(inputdir+'/test_filtered.data', inputdir+'/test.p')
    if not os.path.isfile(inputdir+'/train.p'):
        import dataset
        dataset.data2pickle(inputdir+'/train_filtered.data', inputdir+'/train.p')

    testData = cPickle.load(open(inputdir+'/test.p'))
    trainData = cPickle.load(open(inputdir+'/train.p'))
    # testData = testData[1:5]
    # trainData = trainData[1:15]
    tmp = inputdir.split('_')

    test = data2cv.make_idx_data_cv(testData, window_size, int(tmp[3]))
    train = data2cv.make_idx_data_cv(trainData, window_size, int(tmp[3]))

    print 'load Wv ...'
    Wv = cPickle.load(open(inputdir+'/'+str(dimension)+'/Wv.p'))

    rng = np.random.RandomState(3435)
    PF1 = np.asarray(rng.uniform(low=-1, high=1, size=[101, 5]), dtype=theano.config.floatX)
    padPF1 = np.zeros((1, 5))
    PF1 = np.vstack((padPF1, PF1))
    PF2 = np.asarray(rng.uniform(low=-1, high=1, size=[101, 5]), dtype=theano.config.floatX)
    padPF2 = np.zeros((1, 5))
    PF2 = np.vstack((padPF2, PF2))

    train_conv_net(train,
                    test,