sum_window = int(config['sumwindow']) label_type = config['target'] #nt or ct max_window = int(config['maxwindow']) slotposition = max_window / 2 logger.info('slotpostion is %d', slotposition) (typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile) (traincontexts, resultVectorTrain, resultVectorTrainAll) = utils.read_lines_data(trainfile, typeIndMap, max_window, label_type, -1) logger.info("number of training examples: %d", len(traincontexts)) inputMatrixTrain = utils.adeltheanomatrix_flexible(slotposition, vectorsize, traincontexts, wordvecs, leftsize, rightsize, sum_window, useSum) (contextlistDev, resultVectorDev, resultVectorDevAll) = utils.read_lines_data(devfile, typeIndMap, max_window, label_type, -1) logger.info("number of validation examples: %d", len(contextlistDev)) inputMatrixDev = utils.adeltheanomatrix_flexible(slotposition, vectorsize, contextlistDev, wordvecs, leftsize, rightsize, sum_window, useSum) contextsize = leftsize + rightsize + 1 #build negative results for hinge loss
max_window = int(config['maxwindow']) slotposition = max_window / 2 label_type = config['target'] #nt or ct (typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, -1) logger.info('word2vec vectors are loaded') (testcontexts, resultVectorTest, resultVectorTestAll) = utils.read_lines_data(testsetfile, typeIndMap, max_window, label_type, -1) logger.info("number of test examples: %d", len(testcontexts)) inputMatrixTest = utils.adeltheanomatrix_flexible(slotposition, vectorsize, testcontexts, wordvecs, leftsize, rightsize, sum_window, useSum) contextsize = leftsize + rightsize + 1 ################# for memory ############ testcontexts = [] wordvecs = [] ##################### the network ####################### test_set_x = theano.shared( numpy.matrix(inputMatrixTest, dtype=theano.config.floatX)) # @UndefinedVariable test_set_y = theano.shared( numpy.matrix(resultVectorTestAll, dtype=numpy.dtype(numpy.int32))) rng = numpy.random.RandomState(23455) n_test_batches = test_set_x.get_value(borrow=True).shape[0]
outputtype = config['outtype'] #hinge or softmax leftsize = int(config['left']) rightsize = int(config['right']) sum_window = int(config['sumwindow']) max_window = int(config['maxwindow']) slotposition = max_window / 2 label_type = config['target'] #nt or ct (typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, -1) logger.info('word2vec vectors are loaded') (testcontexts,resultVectorTest, resultVectorTestAll) = utils.read_lines_data(testsetfile, typeIndMap, max_window, label_type, -1) logger.info("number of test examples: %d", len(testcontexts)) inputMatrixTest = utils.adeltheanomatrix_flexible(slotposition, vectorsize, testcontexts, wordvecs, leftsize, rightsize, sum_window, useSum) contextsize = leftsize + rightsize + 1 ################# for memory ############ testcontexts = []; wordvecs = []; ##################### the network ####################### test_set_x = theano.shared(numpy.matrix(inputMatrixTest, dtype=theano.config.floatX)) # @UndefinedVariable test_set_y = theano.shared(numpy.matrix(resultVectorTestAll, dtype=numpy.dtype(numpy.int32))) rng = numpy.random.RandomState(23455) n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images
outputtype = config['outtype'] #hinge or softmax leftsize = int(config['left']) rightsize = int(config['right']) sum_window = int(config['sumwindow']) label_type = config['target'] #nt or ct max_window = int(config['maxwindow']) slotposition = max_window / 2 logger.info('slotpostion is %d', slotposition) (typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile) (traincontexts,resultVectorTrain, resultVectorTrainAll) = utils.read_lines_data(trainfile, typeIndMap, max_window, label_type, -1) logger.info("number of training examples: %d", len(traincontexts)) inputMatrixTrain = utils.adeltheanomatrix_flexible(slotposition, vectorsize, traincontexts, wordvecs, leftsize, rightsize, sum_window, useSum) (contextlistDev,resultVectorDev,resultVectorDevAll) = utils.read_lines_data(devfile, typeIndMap, max_window, label_type, -1) logger.info("number of validation examples: %d", len(contextlistDev)) inputMatrixDev = utils.adeltheanomatrix_flexible(slotposition, vectorsize, contextlistDev, wordvecs, leftsize, rightsize, sum_window, useSum) contextsize = leftsize + rightsize + 1 #build negative results for hinge loss resultMatrixDevAll = numpy.empty(shape=(len(resultVectorDevAll), n_targets)) for i in xrange(len(resultVectorDevAll)): for j in range(n_targets): resultMatrixDevAll[i][j] = resultVectorDevAll[i][j]