targetTypesFile=config['typefile'] vectorFile=config['ent_vectors'] learning_rate = float(config['lrate']) batch_size = 1#int(config['batchsize']) n_epochs = int(config['nepochs']) num_neg = int(config['numneg']) testfile=sys.argv[2] outf=sys.argv[3] use_tanh_out = False outputtype = config['outtype'] #hinge or softmax usetypecosine = False if 'typecosine' in config: usetypecosine = utils.str_to_bool(config['typecosine']) (t2ind, n_targets, wordvectors, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile) (rvt, input_matrix_test, iet,resvectstnall, ntrn) = utils.fillOnlyEntityData(testfile,vectorsize, wordvectors, t2ind, n_targets, upto=-1, ds='test', binoutvec=True) # train network rng = numpy.random.RandomState(23455) if usetypecosine: print 'using cosine(e,t) as another input feature' typevecmatrix = utils.buildtypevecmatrix(t2ind, wordvectors, vectorsize) # a matrix with size: 102 * dim e2simmatrix_test = utils.buildcosinematrix(input_matrix_test, typevecmatrix) input_matrix_test = utils.extend_in_matrix(input_matrix_test, e2simmatrix_test) dt = theano.config.floatX # @UndefinedVariable index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.imatrix('y') # the labels are presented as 1D vector of
l_weight = float(config['loss_weight']) use_tanh_out = False if 'tanh' in config: use_tanh_out = True outputtype = config['outtype'] #hinge or softmax leftsize = int(config['left']) rightsize = int(config['right']) sum_window = int(config['sumwindow']) max_window = int(config['maxwindow']) slotposition = max_window / 2 label_type = config['target'] #nt or ct (typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, -1) logger.info('word2vec vectors are loaded') (testcontexts,resultVectorTest, resultVectorTestAll) = utils.read_lines_data(testsetfile, typeIndMap, max_window, label_type, -1) logger.info("number of test examples: %d", len(testcontexts)) inputMatrixTest = utils.adeltheanomatrix_flexible(slotposition, vectorsize, testcontexts, wordvecs, leftsize, rightsize, sum_window, useSum) contextsize = leftsize + rightsize + 1 ################# for memory ############ testcontexts = []; wordvecs = []; ##################### the network ####################### test_set_x = theano.shared(numpy.matrix(inputMatrixTest, dtype=theano.config.floatX)) # @UndefinedVariable test_set_y = theano.shared(numpy.matrix(resultVectorTestAll, dtype=numpy.dtype(numpy.int32))) rng = numpy.random.RandomState(23455) n_test_batches = test_set_x.get_value(borrow=True).shape[0]
use_tanh_out = False if 'tanh' in config: use_tanh_out = True outputtype = config['outtype'] #hinge or softmax leftsize = int(config['left']) rightsize = int(config['right']) sum_window = int(config['sumwindow']) label_type = config['target'] #nt or ct max_window = int(config['maxwindow']) slotposition = max_window / 2 logger.info('slotpostion is %d', slotposition) (typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile) (traincontexts, resultVectorTrain, resultVectorTrainAll) = utils.read_lines_data(trainfile, typeIndMap, max_window, label_type, -1) logger.info("number of training examples: %d", len(traincontexts)) inputMatrixTrain = utils.adeltheanomatrix_flexible(slotposition, vectorsize, traincontexts, wordvecs, leftsize, rightsize, sum_window, useSum) (contextlistDev, resultVectorDev, resultVectorDevAll) = utils.read_lines_data(devfile, typeIndMap, max_window, label_type, -1) logger.info("number of validation examples: %d", len(contextlistDev))
l_reg = '' l_weight = 0.000001 if 'loss_reg' in config: l_reg = config['loss_reg'] l_weight = float(config['loss_weight']) use_tanh_out = False if 'tanh' in config: use_tanh_out = True outputtype = config['outtype'] #hinge or softmax usetypecosine = False if 'typecosine' in config: usetypecosine = utils.str_to_bool(config['typecosine']) upto = -1 (t2ind, n_targets, wordvectors, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, upto=upto) (rvt, input_matrix_train, iet,resvectrnall, ntrn) = utils.fillOnlyEntityData(trainfile,vectorsize, wordvectors, t2ind, n_targets, upto=upto, binoutvec=True) print "number of training examples:" + str(len(iet)) (rvd, input_matrix_dev, ied,resvecdevall, ntdev) = utils.fillOnlyEntityData(devfile,vectorsize, wordvectors, t2ind, n_targets, upto=upto, binoutvec=True) print "number of validation examples:" + str(len(ied)) if usetypecosine: print 'using cosine(e,t) as another input feature' typevecmatrix = utils.buildtypevecmatrix(t2ind, wordvectors, vectorsize) # a matrix with size: 102 * dim e2simmatrix_train = utils.buildcosinematrix(input_matrix_train, typevecmatrix) e2simmatrix_dev = utils.buildcosinematrix(input_matrix_dev, typevecmatrix) input_matrix_train = utils.extend_in_matrix(input_matrix_train, e2simmatrix_train) input_matrix_dev = utils.extend_in_matrix(input_matrix_dev, e2simmatrix_dev)