Ejemplo n.º 1
0
def run(ModelType, args):
	print("\n********* %s %s Model *********" % (("Logistic" if ModelType == LogisticRegression else "Linear"), ("Wait" if args.time == 'w' else "Help")))
	vectorizers = init_vectorizers()
	trainLoader = DataLoader()
	evaluateLoader = DataLoader()
	testLoader = DataLoader()

	# Filter out bad requests if we are running on help time
	if args.time == 'h':
		trainLoader.loadData('../dataset/dataset-train.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
		evaluateLoader.loadData('../dataset/dataset-dev.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
		testLoader.loadData('../dataset/dataset-test.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
	else:
		trainLoader.loadData('../dataset/dataset-train.npy')
		evaluateLoader.loadData('../dataset/dataset-dev.npy')
		testLoader.loadData('../dataset/dataset-test.npy')

	if ModelType == LogisticRegression:
		buckets = make_buckets(trainLoader, args.buckets, args.time)
		mapper = make_bucket_mapper(buckets)
	else:
		mapper = lambda x: x

	labelFn = lambda x: mapper(x.getWaitTimeMinutes() if args.time == 'w' else x.getHelpTimeMinutes())
	trainLabels = trainLoader.getLabels(labelFn)
	trainInputs = trainLoader.applyVectorizers(vectorizers, "train", args.time)
	devLabels = evaluateLoader.getLabels(labelFn)
	devInputs = evaluateLoader.applyVectorizers(vectorizers, "dev", args.time)
	testLabels = testLoader.getLabels(labelFn)
	testInputs = evaluateLoader.applyVectorizers(vectorizers, "test", args.time)

	trainedModel = trainModel(ModelType, trainInputs, trainLabels)
	evaluateModel(trainedModel, devInputs, devLabels)
	evaluateModel(trainedModel, testInputs, testLabels)