#Casing matrix caseLookup = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'mainly_numeric':5, 'contains_digit': 6, 'PADDING':7} caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing(train_sentences, windowSize, word2Idx, label2Idx, caseLookup) dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing(dev_sentences, windowSize, word2Idx, label2Idx, caseLookup) test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing(test_sentences, windowSize, word2Idx, label2Idx, caseLookup) ##################################### # # Create the Network # ##################################### # Create the train and predict_labels function n_in = 2*windowSize+1 n_hidden = numHiddenUnits n_out = len(label2Idx)
"contains_digit": 6, "PADDING": 7, } caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing( train_sentences, windowSize, word2Idx, label2Idx, caseLookup ) dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing( dev_sentences, windowSize, word2Idx, label2Idx, caseLookup ) test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing( test_sentences, windowSize, word2Idx, label2Idx, caseLookup ) ##################################### # # Create the Network # #####################################