Esempio n. 1
0
sum_window = int(config['sumwindow'])

label_type = config['target']  #nt or ct

max_window = int(config['maxwindow'])
slotposition = max_window / 2
logger.info('slotpostion is %d', slotposition)
(typeIndMap, n_targets, wordvecs, vectorsize,
 typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile)

(traincontexts, resultVectorTrain,
 resultVectorTrainAll) = utils.read_lines_data(trainfile, typeIndMap,
                                               max_window, label_type, -1)
logger.info("number of training examples: %d", len(traincontexts))
inputMatrixTrain = utils.adeltheanomatrix_flexible(slotposition, vectorsize,
                                                   traincontexts, wordvecs,
                                                   leftsize, rightsize,
                                                   sum_window, useSum)

(contextlistDev, resultVectorDev,
 resultVectorDevAll) = utils.read_lines_data(devfile, typeIndMap, max_window,
                                             label_type, -1)
logger.info("number of validation examples: %d", len(contextlistDev))

inputMatrixDev = utils.adeltheanomatrix_flexible(slotposition, vectorsize,
                                                 contextlistDev, wordvecs,
                                                 leftsize, rightsize,
                                                 sum_window, useSum)
contextsize = leftsize + rightsize + 1

#build negative results for hinge loss
Esempio n. 2
0
max_window = int(config['maxwindow'])
slotposition = max_window / 2

label_type = config['target']  #nt or ct

(typeIndMap, n_targets, wordvecs, vectorsize,
 typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile,
                                                -1)
logger.info('word2vec vectors are loaded')
(testcontexts, resultVectorTest,
 resultVectorTestAll) = utils.read_lines_data(testsetfile, typeIndMap,
                                              max_window, label_type, -1)
logger.info("number of test examples: %d", len(testcontexts))

inputMatrixTest = utils.adeltheanomatrix_flexible(slotposition, vectorsize,
                                                  testcontexts, wordvecs,
                                                  leftsize, rightsize,
                                                  sum_window, useSum)
contextsize = leftsize + rightsize + 1
################# for memory ############
testcontexts = []
wordvecs = []
##################### the network #######################
test_set_x = theano.shared(
    numpy.matrix(inputMatrixTest,
                 dtype=theano.config.floatX))  # @UndefinedVariable
test_set_y = theano.shared(
    numpy.matrix(resultVectorTestAll, dtype=numpy.dtype(numpy.int32)))

rng = numpy.random.RandomState(23455)

n_test_batches = test_set_x.get_value(borrow=True).shape[0]
Esempio n. 3
0
outputtype = config['outtype'] #hinge or softmax
leftsize = int(config['left'])
rightsize = int(config['right'])
sum_window = int(config['sumwindow'])
max_window = int(config['maxwindow'])
slotposition = max_window / 2

label_type = config['target'] #nt or ct

(typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, -1)
logger.info('word2vec vectors are loaded')
(testcontexts,resultVectorTest, resultVectorTestAll) = utils.read_lines_data(testsetfile, typeIndMap, max_window, label_type, -1)
logger.info("number of test examples: %d", len(testcontexts))

inputMatrixTest = utils.adeltheanomatrix_flexible(slotposition, vectorsize, testcontexts, wordvecs, leftsize, rightsize, sum_window, useSum)
contextsize = leftsize + rightsize + 1
################# for memory ############
testcontexts = []; wordvecs = []; 
##################### the network #######################
test_set_x = theano.shared(numpy.matrix(inputMatrixTest, dtype=theano.config.floatX))  # @UndefinedVariable
test_set_y = theano.shared(numpy.matrix(resultVectorTestAll, dtype=numpy.dtype(numpy.int32)))

rng = numpy.random.RandomState(23455)

n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_test_batches /= batch_size

# allocate symbolic variables for the data
index = T.lscalar()  # index to a [mini]batch
x = T.matrix('x')  # the data is presented as rasterized images
Esempio n. 4
0
outputtype = config['outtype'] #hinge or softmax
leftsize = int(config['left'])
rightsize = int(config['right'])
sum_window = int(config['sumwindow'])

label_type = config['target'] #nt or ct

max_window = int(config['maxwindow'])
slotposition = max_window / 2
logger.info('slotpostion is %d', slotposition)
(typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile)

(traincontexts,resultVectorTrain, resultVectorTrainAll) = utils.read_lines_data(trainfile, typeIndMap, max_window, label_type, -1)
logger.info("number of training examples: %d", len(traincontexts))
inputMatrixTrain = utils.adeltheanomatrix_flexible(slotposition, vectorsize, traincontexts, wordvecs, leftsize, rightsize, sum_window, useSum)

(contextlistDev,resultVectorDev,resultVectorDevAll) = utils.read_lines_data(devfile, typeIndMap, max_window, label_type, -1)
logger.info("number of validation examples: %d", len(contextlistDev))

inputMatrixDev = utils.adeltheanomatrix_flexible(slotposition, vectorsize, contextlistDev, wordvecs, leftsize, rightsize, sum_window, useSum)
contextsize = leftsize + rightsize + 1


#build negative results for hinge loss

resultMatrixDevAll = numpy.empty(shape=(len(resultVectorDevAll), n_targets))
for i in xrange(len(resultVectorDevAll)):
    for j in range(n_targets):
        resultMatrixDevAll[i][j] = resultVectorDevAll[i][j]