def buildMultiEmbModel(numClasses, seqLen, contextDim, initWeights, eventMap): """ Does a RNN over a CNN model with context information """ cnnHidden = 150 rnnHidden = 64 cl2 = .000 drop = .5 convSize = [2, 3, 4, 5] entDim = 20 distDim = 2 #NOTE hard coded for dataset #shape = (seqLen, 300 + entDim + distDim + contextDim) shape = (seqLen, 300 + entDim + distDim) model = Sequential() init = {wordEmbName: [initWeights]} #NOTE hardcoded for dataset emb = tripleEmbedding(init, seqLen, entDim, distDim, 12, 13, 0) #TODO remove print("embedding input {}".format(emb.input_shape)) print("embedding output {}".format(emb.output_shape)) print("next level shape {}".format(shape)) cnn = multiCNN(shape, cnnHidden, convSize, cl2) model.add(emb) model.add(cnn) #model.add(Conv1D(512, 2)) #model.add(MaxPooling1D(2)) #model.add(Bidirectional(GRU(128))) simple = Input(shape=(contextDim, )) level2 = Model(input=simple, output=simple) level3 = Sequential() level3.add(Merge([model, level2], mode="concat")) #level3.add(Dense(256)) #level3.add(Activation("relu")) level3.add(Dropout(drop)) level3.add(Dense(numClasses, W_constraint=MaxNorm(3.0))) level3.add(Activation("softmax")) level3.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)]) print(level3.summary()) return level3
def buildSplitModel(numClasses, seqLen, vecDim, contextDim, eventMap): """ Builds a GRU model on top of word embeddings, and doc2vec """ hidden = 512 rnnHidden = 128 denseDim = 256 cl2 = .001 drop = .5 convSize = 2 maxSize = 2 shape = (seqLen, vecDim) left = Sequential() #left.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, input_shape=shape, return_sequences=True)) left.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2), input_shape=shape)) left.add(MaxPooling1D(maxSize)) #left.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2))) #left.add(MaxPooling1D(convSize)) left.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop)) right = Sequential() #right.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, input_shape=shape, return_sequences=True)) right.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2), input_shape=shape)) right.add(MaxPooling1D(maxSize)) #right.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2))) #right.add(MaxPooling1D(convSize)) right.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop)) context = Sequential() context.add(Dense(denseDim, input_shape=(contextDim,))) context.add(LeakyReLU(.01)) context.add(Dropout(drop)) #do nothing #context.add(Reshape((contextDim,), input_shape=(contextDim,))) model = Sequential() model.add(Merge([left, right, context], mode="concat")) model.add(Dense(denseDim, W_regularizer=l2(cl2))) model.add(LeakyReLU(.01)) #model.add(MaxoutDense(denseDim, W_regularizer=l2(cl2))) model.add(Dropout(drop)) model.add(Dense(numClasses)) model.add(Activation("softmax")) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', microF1(eventMap)]) return model
def buildCNNModel(numClasses, seqLen, vecDim, contextDim, eventMap): """ Reproduces an CNN """ hidden = 512 cnnHidden = 150 denseDim = 256 cl2 = .0 drop = .5 convSize = [2,3,4,5] shape = (seqLen, vecDim) model= Sequential() cnn = multiCNN(shape, cnnHidden, convSize, cl2) model.add(cnn) #TODO remove print("cnn shape {}".format(cnn.output_shape)) #model.add(Flatten()) #model.add(BatchNormalization()) model.add(Dropout(drop)) model.add(Dense(numClasses)) model.add(Activation("softmax")) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)]) print(model.summary()) return model
def buildMultiEmbModel(numClasses, seqLen, contextDim, initWeights, eventMap): """ Does a RNN over a CNN model with context information """ drop = .5 model = buildBaseCNN(seqLen, initWeights, 0) simple = Input(shape=(contextDim,)) level2 = Model(input=simple, output=simple) level3 = Sequential() level3.add(Merge([model, level2], mode="concat")) level3.add(Dropout(drop)) level3.add(Dense(numClasses, W_constraint=MaxNorm(3.0))) level3.add(Activation("softmax")) level3.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)]) level3.summary() return level3
def buildCNNEmbModel(numClasses, seqLen, contextDim, initWeights, eventMap): """ Reproduces an CNN with embedding """ drop = .5 model = buildBaseCNN(seqLen, initWeights, 0) model.add(Dropout(drop)) model.add(Dense(numClasses, W_constraint=MaxNorm(3.0))) model.add(Activation("softmax")) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)]) model.summary() return model
def buildAttentionModel(numClasses, seqLen, vecDim, contextDim, eventMap): """ Constructs a model with two attention portions """ hidden = 512 rnnHidden = 128 denseDim = 256 cl2 = .001 drop = .5 convSize = 2 maxSize = 2 shape = (seqLen, vecDim) #left context model leftInput = Input(shape=shape) lconv = Conv1D(denseDim, convSize, W_regularizer=l2(cl2))(leftInput) lmax = MaxPooling1D(maxSize)(lconv) left = GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, return_sequences=True)(lmax) #the right context model rightInput = Input(shape=shape) rconv = Conv1D(denseDim, convSize, W_regularizer=l2(cl2))(rightInput) rmax = MaxPooling1D(maxSize)(rconv) right = GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, return_sequences=True)(rmax) #the model for the candidate cInput = Input(shape=(contextDim,)) cdense = Dense(denseDim)(cInput) crelu = LeakyReLU(.01)(cdense) context = Dropout(drop)(crelu) #get the length after max pooling shortLen = right._keras_shape[1] #left attention leftAttn = attentionLayer(left, context, shortLen, rnnHidden, cl2) #right attention rightAttn = attentionLayer(right, context, shortLen, rnnHidden, cl2) joint = merge([leftAttn, rightAttn, context], mode="concat") #model = Sequential() #model.add(Merge([leftAttn, rightAttn, context], mode="concat")) jointDense = Dense(denseDim, W_regularizer=l2(cl2))(joint) #model.add(Dense(denseDim, W_regularizer=l2(cl2))) jointRelu = LeakyReLU(.01)(jointDense) #model.add(LeakyReLU(.01)) #model.add(MaxoutDense(denseDim, W_regularizer=l2(cl2))) jointDrop = Dropout(drop)(jointRelu) #model.add(Dropout(drop)) jointClass = Dense(numClasses)(jointDrop) #model.add(Dense(numClasses)) jointSoft = Activation("softmax")(jointClass) #model.add(Activation("softmax")) model = Model(input=[leftInput, rightInput, cInput], output=jointSoft) #modelWrapper = Sequential() #modelWrapper.add(model) #NOTE hack to fix the order of the inputs #[left, context, right] = modelWrapper.inputs #modelWrapper.inputs = [left,right,context] model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', microF1(eventMap)]) return model
def main(args): """ Runs and evaluates the model """ #show gpu connection info #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) print("Reading the data") dataDict = loadData(args.f) trainingData = dataDict["train_x"] devData = dataDict["dev_x"] testData = dataDict["test_x"] rawTrainingLabels = dataDict["train_y"] rawDevLabels = dataDict["dev_y"] rawTestingLabels = dataDict["test_y"] #make the event map eventMap = load(open(args.m)) trainingLabels = eventMap.namesToMatrix(rawTrainingLabels) devLabels = eventMap.namesToMatrix(rawDevLabels) testingLabels = eventMap.namesToMatrix(rawTestingLabels) (samples, length) = trainingData.shape print("#instances: {}, vector length: {}".format(samples, length)) print("Building the model") #get the model model = buildModel(length, len(eventMap), microF1(eventMap)) #model = buildModel(length, len(eventMap)) print(model.summary()) print("Training the model") #train the model #TODO include F1 metric #TODO try 1/cube root p for weights #TODO write out parameters to logger #hard coding class weights... #weights = defaultdict(lambda: 49.0) #weights = defaultdict(lambda: 1.0) #weights = defaultdict(lambda: 25.0) weights = defaultdict(lambda: 10.0) weights[eventMap.nilIndex()] = 1.0 #make the logger logger = makeLogger(args.o, eventMap) model.fit(trainingData, trainingLabels, nb_epoch=args.e, batch_size=args.b, validation_data=(devData, devLabels), class_weight=weights, callbacks=[logger]) #get the best model best = logger.best() print("Best Model round: {} val: {}".format(logger.bestModel, logger.bestScore)) print("Make Predictions") #make predictions trainPred = best.predict_classes(trainingData, batch_size=args.b) devPred = best.predict_classes(devData, batch_size=args.b) print("\nEvalutation") #evaluate the model print("-----Training Scores-----") evaluatePredictions(trainPred, rawTrainingLabels, eventMap) print("\n-----Dev Scores------") evaluatePredictions(devPred, rawDevLabels, eventMap) if args.t: testPred = best.predict_classes(testData, batch_size=args.b) print("\n\n-----Test Scores------") evaluatePredictions(testPred, rawTestingLabels, eventMap) print("STD eval {}".format(best.evaluate(devData, devLabels)))