idx = 1 for bioTag in ['B-', 'I-']: for nerClass in ['PER', 'LOC', 'ORG', 'OTH']: for subtype in ['', 'deriv', 'part']: label2Idx[bioTag+nerClass+subtype] = idx idx += 1 #Inverse label mapping idx2Label = {v: k for k, v in label2Idx.items()} # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_y = GermEvalReader.createNumpyArray(train_sentences, windowSize, word2Idx, label2Idx) dev_x, dev_y = GermEvalReader.createNumpyArray(dev_sentences, windowSize, word2Idx, label2Idx) test_x, test_y = GermEvalReader.createNumpyArray(test_sentences, windowSize, word2Idx, label2Idx) ##################################### # # Create the Lasagne Network # #####################################
"numeric": 0, "allLower": 1, "allUpper": 2, "initialUpper": 3, "other": 4, "mainly_numeric": 5, "contains_digit": 6, "PADDING": 7, } caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing( train_sentences, windowSize, word2Idx, label2Idx, caseLookup ) dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing( dev_sentences, windowSize, word2Idx, label2Idx, caseLookup ) test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing( test_sentences, windowSize, word2Idx, label2Idx, caseLookup )
label2Idx[bioTag+nerClass+subtype] = idx idx += 1 #Inverse label mapping idx2Label = {v: k for k, v in label2Idx.items()} #Casing matrix caseLookup = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'mainly_numeric':5, 'contains_digit': 6, 'PADDING':7} caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing(train_sentences, windowSize, word2Idx, label2Idx, caseLookup) dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing(dev_sentences, windowSize, word2Idx, label2Idx, caseLookup) test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing(test_sentences, windowSize, word2Idx, label2Idx, caseLookup) ##################################### # # Create the Network # #####################################
# Create a mapping for our labels label2Idx = {'O': 0} idx = 1 for bioTag in ['B-', 'I-']: for nerClass in ['PER', 'LOC', 'ORG', 'OTH']: for subtype in ['', 'deriv', 'part']: label2Idx[bioTag + nerClass + subtype] = idx idx += 1 #Inverse label mapping idx2Label = {v: k for k, v in label2Idx.items()} # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_y = GermEvalReader.createNumpyArray(train_sentences, windowSize, word2Idx, label2Idx) dev_x, dev_y = GermEvalReader.createNumpyArray(dev_sentences, windowSize, word2Idx, label2Idx) test_x, test_y = GermEvalReader.createNumpyArray(test_sentences, windowSize, word2Idx, label2Idx) ##################################### # # Create the Network #
#Casing matrix caseLookup = { 'numeric': 0, 'allLower': 1, 'allUpper': 2, 'initialUpper': 3, 'other': 4, 'PADDING': 5 } caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_case_x, train_y = GermEvalReader_with_casing.createNumpyArrayWithCasing( train_sentences, windowSize, word2Idx, label2Idx, caseLookup) dev_x, dev_case_x, dev_y = GermEvalReader_with_casing.createNumpyArrayWithCasing( dev_sentences, windowSize, word2Idx, label2Idx, caseLookup) test_x, test_case_x, test_y = GermEvalReader_with_casing.createNumpyArrayWithCasing( test_sentences, windowSize, word2Idx, label2Idx, caseLookup) ##################################### # # Create the Lasagne Network #
#Casing matrix caseLookup = { 'numeric': 0, 'allLower': 1, 'allUpper': 2, 'initialUpper': 3, 'other': 4, 'mainly_numeric': 5, 'contains_digit': 6, 'PADDING': 7 } caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_data = GermEvalReader.createDataset(train_sentences, word2Idx, label2Idx, caseLookup) dev_data = GermEvalReader.createDataset(dev_sentences, word2Idx, label2Idx, caseLookup) test_data = GermEvalReader.createDataset(test_sentences, word2Idx, label2Idx, caseLookup) ##################################### # # Create the Network #
for subtype in ['', 'deriv', 'part']: label2Idx[bioTag+nerClass+subtype] = idx idx += 1 #Inverse label mapping idx2Label = {v: k for k, v in label2Idx.items()} #Casing matrix caseLookup = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'PADDING':5} caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX) # Read in data print "Read in data and create matrices" train_sentences = GermEvalReader.readFile(trainFile) dev_sentences = GermEvalReader.readFile(devFile) test_sentences = GermEvalReader.readFile(testFile) # Create numpy arrays train_x, train_case_x, train_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(train_sentences, windowSize, word2Idx, label2Idx, caseLookup) dev_x, dev_case_x, dev_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(dev_sentences, windowSize, word2Idx, label2Idx, caseLookup) test_x, test_case_x, test_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(test_sentences, windowSize, word2Idx, label2Idx, caseLookup) ##################################### # # Create the Lasagne Network # #####################################