Beispiel #1
0
def buildMultiEmbModel(numClasses, seqLen, contextDim, initWeights, eventMap):
    """
	Does a RNN over a CNN model with context information
	"""
    cnnHidden = 150
    rnnHidden = 64
    cl2 = .000
    drop = .5
    convSize = [2, 3, 4, 5]

    entDim = 20
    distDim = 2

    #NOTE hard coded for dataset
    #shape = (seqLen, 300 + entDim + distDim + contextDim)
    shape = (seqLen, 300 + entDim + distDim)

    model = Sequential()

    init = {wordEmbName: [initWeights]}

    #NOTE hardcoded for dataset
    emb = tripleEmbedding(init, seqLen, entDim, distDim, 12, 13, 0)

    #TODO remove
    print("embedding input {}".format(emb.input_shape))
    print("embedding output {}".format(emb.output_shape))
    print("next level shape {}".format(shape))

    cnn = multiCNN(shape, cnnHidden, convSize, cl2)

    model.add(emb)
    model.add(cnn)

    #model.add(Conv1D(512, 2))
    #model.add(MaxPooling1D(2))
    #model.add(Bidirectional(GRU(128)))

    simple = Input(shape=(contextDim, ))
    level2 = Model(input=simple, output=simple)

    level3 = Sequential()

    level3.add(Merge([model, level2], mode="concat"))

    #level3.add(Dense(256))
    #level3.add(Activation("relu"))

    level3.add(Dropout(drop))

    level3.add(Dense(numClasses, W_constraint=MaxNorm(3.0)))
    level3.add(Activation("softmax"))

    level3.compile(loss='categorical_crossentropy',
                   optimizer='adadelta',
                   metrics=['accuracy', microF1(eventMap)])

    print(level3.summary())

    return level3
Beispiel #2
0
def buildSplitModel(numClasses, seqLen, vecDim, contextDim, eventMap):
	"""
	Builds a GRU model on top of word embeddings, and doc2vec
	"""
	hidden = 512
	rnnHidden = 128
	denseDim = 256
	cl2 = .001
	drop = .5
	convSize = 2
	maxSize = 2
	shape = (seqLen, vecDim)

	left = Sequential()
	#left.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, input_shape=shape, return_sequences=True))

	left.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2), input_shape=shape))
	left.add(MaxPooling1D(maxSize))

	#left.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2)))
	#left.add(MaxPooling1D(convSize))

	left.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop))
	
	right = Sequential()
	#right.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, input_shape=shape, return_sequences=True))

	right.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2), input_shape=shape))
	right.add(MaxPooling1D(maxSize))

	#right.add(Conv1D(denseDim, convSize, W_regularizer=l2(cl2)))
	#right.add(MaxPooling1D(convSize))

	right.add(GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop))

	context = Sequential()
	context.add(Dense(denseDim, input_shape=(contextDim,)))
	context.add(LeakyReLU(.01))
	context.add(Dropout(drop))
	
	#do nothing
	#context.add(Reshape((contextDim,), input_shape=(contextDim,)))

	model = Sequential()
	model.add(Merge([left, right, context], mode="concat"))

	model.add(Dense(denseDim, W_regularizer=l2(cl2)))
	model.add(LeakyReLU(.01))
	#model.add(MaxoutDense(denseDim, W_regularizer=l2(cl2)))
	model.add(Dropout(drop))

	model.add(Dense(numClasses))
	model.add(Activation("softmax"))
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', microF1(eventMap)])

	return model
Beispiel #3
0
def buildCNNModel(numClasses, seqLen, vecDim, contextDim, eventMap):
	"""
	Reproduces an CNN
	"""
	hidden = 512
	cnnHidden = 150
	denseDim = 256
	cl2 = .0
	drop = .5
	convSize = [2,3,4,5]
	shape = (seqLen, vecDim)

	model= Sequential()

	cnn = multiCNN(shape, cnnHidden, convSize, cl2)
	model.add(cnn)

	#TODO remove
	print("cnn shape {}".format(cnn.output_shape))

	#model.add(Flatten())
	#model.add(BatchNormalization())
	model.add(Dropout(drop))
	model.add(Dense(numClasses))
	model.add(Activation("softmax"))

	model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)])

	print(model.summary())

	return model
Beispiel #4
0
def buildMultiEmbModel(numClasses, seqLen, contextDim, initWeights, eventMap):
	"""
	Does a RNN over a CNN model with context information
	"""
	drop = .5
	model = buildBaseCNN(seqLen, initWeights, 0)

	simple = Input(shape=(contextDim,))
	level2 = Model(input=simple, output=simple)

	level3 = Sequential()

	level3.add(Merge([model, level2], mode="concat"))
	level3.add(Dropout(drop))
	
	level3.add(Dense(numClasses, W_constraint=MaxNorm(3.0)))
	level3.add(Activation("softmax"))

	level3.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)])

	level3.summary()

	return level3
Beispiel #5
0
def buildCNNEmbModel(numClasses, seqLen, contextDim, initWeights, eventMap):
	"""
	Reproduces an CNN with embedding
	"""
	drop = .5
	model = buildBaseCNN(seqLen, initWeights, 0)
	model.add(Dropout(drop))
	
	model.add(Dense(numClasses, W_constraint=MaxNorm(3.0)))
	model.add(Activation("softmax"))

	model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy', microF1(eventMap)])

	model.summary()

	return model
Beispiel #6
0
def buildAttentionModel(numClasses, seqLen, vecDim, contextDim, eventMap):
	"""
	Constructs a model with two attention portions
	"""
	hidden = 512
	rnnHidden = 128
	denseDim = 256
	cl2 = .001
	drop = .5
	convSize = 2
	maxSize = 2
	shape = (seqLen, vecDim)

	#left context model
	leftInput = Input(shape=shape)
	lconv = Conv1D(denseDim, convSize, W_regularizer=l2(cl2))(leftInput)
	lmax = MaxPooling1D(maxSize)(lconv)
	left = GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, return_sequences=True)(lmax)

	#the right context model
	rightInput = Input(shape=shape)
	rconv = Conv1D(denseDim, convSize, W_regularizer=l2(cl2))(rightInput)
	rmax = MaxPooling1D(maxSize)(rconv)
	right = GRU(rnnHidden, W_regularizer=l2(cl2), U_regularizer=l2(cl2), dropout_W=drop, dropout_U=drop, return_sequences=True)(rmax)

	#the model for the candidate
	cInput = Input(shape=(contextDim,))
	cdense = Dense(denseDim)(cInput)
	crelu = LeakyReLU(.01)(cdense)
	context = Dropout(drop)(crelu)

	#get the length after max pooling
	shortLen = right._keras_shape[1]

	#left attention 
	leftAttn = attentionLayer(left, context, shortLen, rnnHidden, cl2)

	#right attention 
	rightAttn = attentionLayer(right, context, shortLen, rnnHidden, cl2)

	joint = merge([leftAttn, rightAttn, context], mode="concat")
	#model = Sequential()
	#model.add(Merge([leftAttn, rightAttn, context], mode="concat"))

	jointDense = Dense(denseDim, W_regularizer=l2(cl2))(joint)
	#model.add(Dense(denseDim, W_regularizer=l2(cl2)))

	jointRelu = LeakyReLU(.01)(jointDense)
	#model.add(LeakyReLU(.01))
	#model.add(MaxoutDense(denseDim, W_regularizer=l2(cl2)))

	jointDrop = Dropout(drop)(jointRelu)
	#model.add(Dropout(drop))

	jointClass = Dense(numClasses)(jointDrop)
	#model.add(Dense(numClasses))

	jointSoft = Activation("softmax")(jointClass)
	#model.add(Activation("softmax"))

	model = Model(input=[leftInput, rightInput, cInput], output=jointSoft)

	#modelWrapper = Sequential()
	#modelWrapper.add(model)

	#NOTE hack to fix the order of the inputs
	#[left, context, right] = modelWrapper.inputs
	#modelWrapper.inputs = [left,right,context]

	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', microF1(eventMap)])

	return model
def main(args):
    """
	Runs and evaluates the model
	"""
    #show gpu connection info
    #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

    print("Reading the data")
    dataDict = loadData(args.f)

    trainingData = dataDict["train_x"]
    devData = dataDict["dev_x"]
    testData = dataDict["test_x"]

    rawTrainingLabels = dataDict["train_y"]
    rawDevLabels = dataDict["dev_y"]
    rawTestingLabels = dataDict["test_y"]

    #make the event map
    eventMap = load(open(args.m))

    trainingLabels = eventMap.namesToMatrix(rawTrainingLabels)
    devLabels = eventMap.namesToMatrix(rawDevLabels)
    testingLabels = eventMap.namesToMatrix(rawTestingLabels)

    (samples, length) = trainingData.shape

    print("#instances: {}, vector length: {}".format(samples, length))

    print("Building the model")

    #get the model
    model = buildModel(length, len(eventMap), microF1(eventMap))
    #model = buildModel(length, len(eventMap))
    print(model.summary())

    print("Training the model")
    #train the model
    #TODO include F1 metric
    #TODO try 1/cube root p for weights
    #TODO write out parameters to logger

    #hard coding class weights...
    #weights = defaultdict(lambda: 49.0)
    #weights = defaultdict(lambda: 1.0)
    #weights = defaultdict(lambda: 25.0)
    weights = defaultdict(lambda: 10.0)
    weights[eventMap.nilIndex()] = 1.0

    #make the logger
    logger = makeLogger(args.o, eventMap)

    model.fit(trainingData,
              trainingLabels,
              nb_epoch=args.e,
              batch_size=args.b,
              validation_data=(devData, devLabels),
              class_weight=weights,
              callbacks=[logger])

    #get the best model
    best = logger.best()

    print("Best Model round: {} val: {}".format(logger.bestModel,
                                                logger.bestScore))

    print("Make Predictions")
    #make predictions
    trainPred = best.predict_classes(trainingData, batch_size=args.b)
    devPred = best.predict_classes(devData, batch_size=args.b)

    print("\nEvalutation")
    #evaluate the model

    print("-----Training Scores-----")
    evaluatePredictions(trainPred, rawTrainingLabels, eventMap)

    print("\n-----Dev Scores------")
    evaluatePredictions(devPred, rawDevLabels, eventMap)

    if args.t:
        testPred = best.predict_classes(testData, batch_size=args.b)
        print("\n\n-----Test Scores------")
        evaluatePredictions(testPred, rawTestingLabels, eventMap)

    print("STD eval {}".format(best.evaluate(devData, devLabels)))