Exemple #1
0
def main(args):
    """
	Loads and evaluates a model
	"""
    print("Reading the data")
    dataDict = loadData(args.f, args.s)

    devData = dataDict["dev_x"]
    testData = dataDict["test_x"]

    if args.emb:
        devData = setupEmbeddings(devData)
        testData = setupEmbeddings(testData)

    devLabels = dataDict["dev_y"]
    testingLabels = dataDict["test_y"]

    #make the event map
    eventMap = load(open(args.p))

    #load the model
    print("Loading model")
    model = loadModel(args.m, eventMap)

    #pick the correct set to eval on
    if args.t:
        print("Using Testing")
        evalLabels = testingLabels
        evalData = testData

    else:
        print("Using Dev")
        evalLabels = devLabels
        evalData = devData

    print("Evaluating")
    #make predictions
    pred = predictClasses(model, evalData, args.b)

    print("\nEvalutation")
    #evaluate the model

    print("-----Scores-----")
    evaluatePredictions(pred, evalLabels, eventMap)
Exemple #2
0
def evalModel(evalData, evalLabels, fileName, eventMap, out, batchSize):

    model = loadModel(fileName, eventMap)

    pred = model.predict_classes(evalData, batch_size=batchSize)

    score = evaluatePredictions(pred, evalLabels, eventMap, False)

    out.write("{} - {}\n".format(basename(fileName), score))

    #clear the tensorflow compute graph
    b.clear_session()
def main(args):
    """
	Builds and evaluates an ensemble
	"""
    useBothHalves = args.full

    #read the data
    print("Reading the data")
    dataDict = loadData(args.f, args.s)

    trainingData = dataDict["train_x"]
    devData = dataDict["dev_x"]
    testData = dataDict["test_x"]

    rawTrainingLabels = dataDict["train_y"]
    rawDevLabels = dataDict["dev_y"]
    rawTestingLabels = dataDict["test_y"]

    #make the event map
    eventMap = load(open(args.m))

    trainingLabels = eventMap.namesToMatrix(rawTrainingLabels)
    devLabels = eventMap.namesToMatrix(rawDevLabels)
    testingLabels = eventMap.namesToMatrix(rawTestingLabels)

    if args.emb:
        trainingData = setupEmbeddings(trainingData, useBothHalves)
        devData = setupEmbeddings(devData, useBothHalves)
        testData = setupEmbeddings(testData, useBothHalves)

    if args.f1:
        parser = parseF1Scores
    else:
        parser = parseScores

    #load the pre-made ensemble
    if args.e:
        models = loadEnsemble(args.e, eventMap)

    #find the top models
    else:
        models = loadBest(args.d, eventMap, args.n, parser)

    #evalutate the ensemble
    #make predictions
    trainPred = majorityPredictions(models, trainingData, args.b,
                                    len(eventMap))
    devPred = majorityPredictions(models, devData, args.b, len(eventMap))

    print("-----Training Scores-----")
    evaluatePredictions(trainPred, rawTrainingLabels, eventMap)

    print("\n-----Dev Scores------")
    evaluatePredictions(devPred, rawDevLabels, eventMap)

    if args.t:
        testPred = majorityPredictions(models, testData, args.b, len(eventMap))
        print("\n\n-----Test Scores------")
        evaluatePredictions(testPred, rawTestingLabels, eventMap)
Exemple #4
0
def trainOnFold(data, labels, outDir, numModels, partition, params):
    """
	Trains several models the given data and parition and returns the best one
	"""
    trainPart, devPart = partition

    #partition the data and labels
    trainX, trainY = partitionData(data, labels, trainPart)
    devX, devY = partitionData(data, labels, devPart)

    models = []

    #train multiple models
    for i in range(numModels):

        modelDir = join(outDir, str(i))

        mkdir(modelDir)

        #setup logger
        logger = makeLogger(modelDir, params.eventMap)

        #train model
        model, index = trainModel(trainX, trainY, devX, devY, logger, params)

        #make predictions
        pred = predictClasses(model, devX, params.batchSize)

        #evaluate using F1
        score = evaluatePredictions(pred, params.eventMap.matrixToNames(devY),
                                    params.eventMap, False)

        models.append((score, i, index, model))

        #need to clean up after building a model
        b.clear_session()

    #return best model
    return max(models)
Exemple #5
0
def main(args):
	"""
	Runs and evaluates the model
	"""
	#n.random.seed(13)
	n.random.seed(16)
	#n.random.seed(17) #better for realis14
	#n.random.seed(20)

	print("Reading the data")
	dataDict = loadData(args.f, args.s)

	useBothHalves = args.full
	useEmb = args.emb or args.full

	if args.o:
		mkdir(args.o)

	#unpack the data
	if useEmb:
		trainData = setupEmbeddings(dataDict["train_x"], useBothHalves)
		devData = setupEmbeddings(dataDict["dev_x"], useBothHalves)
		testData = setupEmbeddings(dataDict["test_x"], useBothHalves)
	else:
		trainData = dataDict["train_x"]
		devData = dataDict["dev_x"]
		testData = dataDict["test_x"]

	rawTrainingLabels = dataDict["train_y"] 
	rawDevLabels = dataDict["dev_y"] 
	rawTestingLabels = dataDict["test_y"] 
	
	#make the event map
	eventMap = load(open(args.m))

	trainingLabels = eventMap.namesToMatrix(rawTrainingLabels)
	devLabels = eventMap.namesToMatrix(rawDevLabels)
	testingLabels = eventMap.namesToMatrix(rawTestingLabels)

	if useEmb:
		(samples, seqLen) = trainData[0].shape
	else:
		(samples, seqLen, dim) = trainData[0].shape

	print(trainData[0].shape)

	if args.s:
		(rightSamples, contextDim) = trainData[2].shape
	else:

		if useBothHalves:
			rightSamples = trainData[0].shape[0]
			(_, contextDim) = trainData[-1].shape
		else:
			(rightSamples, contextDim) = trainData[-1].shape

	print("#instances: {}, seq len: {}".format(samples, seqLen))
	print("right side {} {}".format(rightSamples, contextDim))
	print("labels shape {}".format(trainingLabels.shape))

	print("Building the model")
	
	#get the model
	if useEmb:

		w2vPath = "data/vectors/word2vec/GoogleNews-vectors-negative300.bin.gz"
		indexPath = "data/word_index.p"

		#load the realis data
		if args.realis:
			realisData = loadRealisData(args.realis)
			trainData += [realisData[0]]
			devData += [realisData[1]]
			testData += [realisData[2]]

			(_, contextDim) = realisData[0].shape

		#load the weights
		w2v = loadW2V(w2vPath)

		#load the index
		wordIndex = load(open(indexPath))

		#make the initial weights
		initWeights = makeEmbeddingWeights(w2v, wordIndex)

		if args.full or args.realis:
			model = buildMultiEmbModel(len(eventMap), seqLen, contextDim, initWeights, eventMap)

		else:
			model = buildCNNEmbModel(len(eventMap), seqLen, contextDim, initWeights, eventMap)

	else:
		model = buildCNNModel(len(eventMap), seqLen, dim, contextDim, eventMap)

	#train the model
	print("Training the model")
	
	#hard coding class weights...
	weights = defaultdict(lambda: 5.5)
	#weights = defaultdict(lambda: 7.0)
	weights[eventMap.nilIndex()] = 1.0

	#make the logger
	logger = makeLogger(args.o, eventMap)

	model.fit(trainData, trainingLabels, nb_epoch=args.e, batch_size=args.b, 
		validation_data=(devData, devLabels), callbacks=[logger], 
		class_weight=weights)

	#get the best model
	best = logger.best()

	print("Best Model round: {} val: {}".format(logger.bestModel, logger.bestScore))

	print("Make Predictions")

	#make predictions
	trainPred = predictClasses(best, trainData, args.b)
	devPred = predictClasses(best, devData, args.b)

	print("\nEvalutation")
	#evaluate the model

	print("-----Training Scores-----")
	evaluatePredictions(trainPred, rawTrainingLabels, eventMap)

	print("\n-----Dev Scores------")
	evaluatePredictions(devPred, rawDevLabels, eventMap)

	if args.t:
		testPred = predictClasses(best, testData, args.b)
		print("\n\n-----Test Scores------")
		evaluatePredictions(testPred, rawTestingLabels, eventMap)
Exemple #6
0
def main(args):
    """
	Runs and evaluates the model
	"""
    print("Reading the data")
    dataDict = loadDir(args.f)

    trainData = dataDict["train_x"]
    devData = dataDict["dev_x"]
    testData = dataDict["test_x"]

    rawTrainingLabels = dataDict["train_y"]
    rawDevLabels = dataDict["dev_y"]
    rawTestingLabels = dataDict["test_y"]

    #wordIndex = dataDict["word_index"]

    #make the event map
    eventMap = load(open(args.m))

    #TODO remove
    print(Counter(eventMap.toNames(flatten(rawTrainingLabels))).items())

    trainingLabels = seqLabelsToMatrix(rawTrainingLabels, eventMap)
    devLabels = seqLabelsToMatrix(rawDevLabels, eventMap)
    testingLabels = seqLabelsToMatrix(rawTestingLabels, eventMap)

    (samples, seqLen, dim) = trainData[0].shape
    (_, ctxDim) = trainData[1].shape

    print("#instances: {}, vector length: {}".format(samples, dim))
    #print("#instances: {}".format(len(trainLeftData)))

    print("Building the model")

    #get the model
    model = buildModel(len(eventMap), seqLen, dim, ctxDim)

    print("Training the model")

    #train the model

    #make the logger
    logger = makeLogger(args.o, eventMap)

    #TODO remove
    """
	trainLeftData = n.array( [ [1,2,3], [1,5] ] )
	trainingLabels = n.array(seqNamesToMatrix( [ ["NIL", "NIL", "Attack"], ["NIL", "Attack"] ], eventMap ))
	devLeftData = trainLeftData
	devLabels = trainingLabels
	"""

    #model.fit(n.asarray(trainLeftData), n.asarray(trainingLabels), nb_epoch=args.e, batch_size=args.b, validation_data=(devLeftData, devLabels), class_weight=weights, callbacks=[logger])
    model.fit(trainData,
              trainingLabels,
              nb_epoch=args.e,
              batch_size=args.b,
              validation_data=(devData, devLabels),
              callbacks=[logger])

    #get the best model
    best = logger.best()

    print("Best Model round: {} val: {}".format(logger.bestModel,
                                                logger.bestScore))

    print("Make Predictions")
    #make predictions
    trainPred = predictClasses(best, trainData, args.b)
    devPred = predictClasses(best, devData, args.b)

    print("\nEvalutation")
    #evaluate the model

    print("-----Training Scores-----")
    evaluatePredictions(flatten(trainPred),
                        eventMap.toNames(flatten(rawTrainingLabels)), eventMap)

    print("\n-----Dev Scores------")
    evaluatePredictions(flatten(devPred),
                        eventMap.toNames(flatten(rawDevLabels)), eventMap)

    if args.t:
        testPred = best.predict_classes(testData, batch_size=args.b)
        print("\n\n-----Test Scores------")
        evaluatePredictions(flatten(testPred),
                            eventMap.toNames(flatten(rawTestingLabels)),
                            eventMap)
Exemple #7
0
def main(args):
    """
	Runs and evaluates the model
	"""
    #n.random.seed(13)
    n.random.seed(16)

    print("Reading the data")
    dataDict = loadData(args.f)

    useEmb = args.full

    if args.o:
        mkdir(args.o)

    #unpack the data
    trainData = setupEmbeddings(dataDict["train_x"])
    devData = setupEmbeddings(dataDict["dev_x"])
    testData = setupEmbeddings(dataDict["test_x"])

    rawTrainingLabels = dataDict["train_y"]
    rawDevLabels = dataDict["dev_y"]
    rawTestingLabels = dataDict["test_y"]

    #make the event map
    eventMap = load(open(args.m))

    trainingLabels = eventMap.namesToMatrix(rawTrainingLabels)
    devLabels = eventMap.namesToMatrix(rawDevLabels)
    testingLabels = eventMap.namesToMatrix(rawTestingLabels)

    (samples, seqLen) = trainData[0].shape

    print(trainData[0].shape)

    (rightSamples, contextDim) = trainData[-1].shape

    print("#instances: {}, seq len: {}".format(samples, seqLen))
    print("right side {} {}".format(rightSamples, contextDim))
    print("labels shape {}".format(trainingLabels.shape))

    print("Building the model")

    #get the model
    w2vPath = "data/vectors/word2vec/GoogleNews-vectors-negative300.bin.gz"
    indexPath = "data/word_index.p"

    #load the weights
    #maybe it was commented for run 13?
    w2v = loadW2V(w2vPath)
    #w2v = {}

    #load the index
    wordIndex = load(open(indexPath))

    #make the initial weights
    initWeights = makeEmbeddingWeights(w2v, wordIndex)

    if args.full:
        model = buildMultiEmbModel(len(eventMap), seqLen, contextDim,
                                   initWeights, eventMap)

    else:
        model = buildCNNEmbModel(len(eventMap), seqLen, contextDim,
                                 initWeights, eventMap)

    #train the model
    print("Training the model")

    #hard coding class weights...
    #weights = {0:1.0, 1:5.5}
    weights = {0: 1.0, 1: 6.0}
    #weights = {0:1.0, 1:9.0}

    #make the logger
    logger = makeLogger(args.o, eventMap)

    model.fit(trainData,
              trainingLabels,
              nb_epoch=args.e,
              batch_size=args.b,
              validation_data=(devData, devLabels),
              callbacks=[logger],
              class_weight=weights)

    #get the best model
    best = logger.best()

    print("Best Model round: {} val: {}".format(logger.bestModel,
                                                logger.bestScore))
    #print("F1 Best Model round: {} val: {}".format(sndLog.bestModel, sndLog.bestScore))

    print("Make Predictions")
    #make predictions
    trainPred = predictClasses(best, trainData, args.b)
    devPred = predictClasses(best, devData, args.b)

    print("\nEvalutation")
    #evaluate the model

    print("-----Training Scores-----")
    evaluatePredictions(trainPred, rawTrainingLabels, eventMap)

    print("\n-----Dev Scores------")
    evaluatePredictions(devPred, rawDevLabels, eventMap)

    if args.t:
        testPred = predictClasses(best, testData, args.b)
        print("\n\n-----Test Scores------")
        evaluatePredictions(testPred, rawTestingLabels, eventMap)

    #output the embedded layer
    if args.out and not args.full:

        realisOut = "realisOut"
        realisProbOut = "realisProbOut"

        makeNames = lambda p: [
            join(args.o, p, i)
            for i in ["training_pred", "dev_pred", "test_pred"]
        ]

        outModel = buildCNNEmbOutput(len(eventMap), seqLen, contextDim,
                                     eventMap, best)

        #do realis layer prediction
        realis = predictRealis(outModel, [trainData, devData, testData])
        realisPaths = makeNames(realisOut)
        mkdir(join(args.o, realisOut))

        #do realis prob prediction
        realisProb = predictRealis(best, [trainData, devData, testData])
        realisProbPaths = makeNames(realisProbOut)
        mkdir(join(args.o, realisProbOut))

        writeRealis(padRealis(args.eventPath, args.f, realis), realisPaths)
        writeRealis(padRealis(args.eventPath, args.f, realisProb),
                    realisProbPaths)