Esempio n. 1
0

#Casing matrix
caseLookup = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'mainly_numeric':5, 'contains_digit': 6, 'PADDING':7}
                      
caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)
            
     
# Read in data   
print "Read in data and create matrices"    
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing(train_sentences, windowSize, word2Idx, label2Idx, caseLookup)
dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing(dev_sentences, windowSize, word2Idx, label2Idx, caseLookup)
test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing(test_sentences, windowSize, word2Idx, label2Idx, caseLookup)

#####################################
#
# Create the  Network
#
#####################################


# Create the train and predict_labels function
n_in = 2*windowSize+1
n_hidden = numHiddenUnits
n_out = len(label2Idx)
    "contains_digit": 6,
    "PADDING": 7,
}

caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)


# Read in data
print "Read in data and create matrices"
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing(
    train_sentences, windowSize, word2Idx, label2Idx, caseLookup
)
dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing(
    dev_sentences, windowSize, word2Idx, label2Idx, caseLookup
)
test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing(
    test_sentences, windowSize, word2Idx, label2Idx, caseLookup
)


#####################################
#
# Create the  Network
#
#####################################