Ejemplo n.º 1
0
def init_vectorizers(log=True):
	"""
	Initializes vectorizers.
	"""
	if log: print("Initializing vectorizers...", end="\r")

	# Create DataLoaders for train and full for the vectorizers
	trainD = DataLoader()
	trainD.loadData('../dataset/dataset-train.npy')
	fullD = DataLoader()
	fullD.loadData('../dataset/dataset.npy')

	# Create the vectorizers
	return [
		TFIDFRequestTextVectorizer(trainD),
		HelperIDVectorizer(fullD),
		CourseIDVectorizer(),
		RequestTimeVectorizer(),
		StudentVectorizer(fullD),
		PastRequestsVectorizer(fullD),
		DueDateVectorizer()
	]
Ejemplo n.º 2
0
import tensorflow as tf
from data.DataLoader import DataLoader
from ClassificationNNModel import ClassificationNNModel
from random import uniform
from RunNN import Config

if __name__ == "__main__":
    config = Config("h", 10, "classification", 5)

    results = []
    for i in range(100):
        print("Iteration %i" % i)
        config.lr = 10**uniform(-2, -6)

        with tf.Graph().as_default():
            model = ClassificationNNModel(config)
            loader = DataLoader()
            loader.loadData("../dataset/dataset-train.npy",
                            filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0,
                            log=False)
            model.run(loader, "h", train=True, log=False)
            loss = model.run(loader, "h", train=False, log=False)
            results.append((config.lr, loss))

    print(sorted(results, key=lambda r: r[1]))
Ejemplo n.º 3
0
def run(ModelType, args):
	print("\n********* %s %s Model *********" % (("Logistic" if ModelType == LogisticRegression else "Linear"), ("Wait" if args.time == 'w' else "Help")))
	vectorizers = init_vectorizers()
	trainLoader = DataLoader()
	evaluateLoader = DataLoader()
	testLoader = DataLoader()

	# Filter out bad requests if we are running on help time
	if args.time == 'h':
		trainLoader.loadData('../dataset/dataset-train.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
		evaluateLoader.loadData('../dataset/dataset-dev.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
		testLoader.loadData('../dataset/dataset-test.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
	else:
		trainLoader.loadData('../dataset/dataset-train.npy')
		evaluateLoader.loadData('../dataset/dataset-dev.npy')
		testLoader.loadData('../dataset/dataset-test.npy')

	if ModelType == LogisticRegression:
		buckets = make_buckets(trainLoader, args.buckets, args.time)
		mapper = make_bucket_mapper(buckets)
	else:
		mapper = lambda x: x

	labelFn = lambda x: mapper(x.getWaitTimeMinutes() if args.time == 'w' else x.getHelpTimeMinutes())
	trainLabels = trainLoader.getLabels(labelFn)
	trainInputs = trainLoader.applyVectorizers(vectorizers, "train", args.time)
	devLabels = evaluateLoader.getLabels(labelFn)
	devInputs = evaluateLoader.applyVectorizers(vectorizers, "dev", args.time)
	testLabels = testLoader.getLabels(labelFn)
	testInputs = evaluateLoader.applyVectorizers(vectorizers, "test", args.time)

	trainedModel = trainModel(ModelType, trainInputs, trainLabels)
	evaluateModel(trainedModel, devInputs, devLabels)
	evaluateModel(trainedModel, testInputs, testLabels)
Ejemplo n.º 4
0
from data.DataLoader import DataLoader
import matplotlib.pyplot as plt
from collections import Counter
from util import make_buckets, make_bucket_mapper

if __name__ == "__main__":
    d = DataLoader()
    d.loadData('../dataset/dataset.npy')

    help_vals = [r.getHelpTimeMinutes() for r in d.laIRRequests]
    wait_vals = [r.getWaitTimeMinutes() for r in d.laIRRequests]

    bucket_vals = [i for i in range(0, 120, 10)] + [float('inf')]

    plt.hist([help_vals, wait_vals],
             bucket_vals,
             label=["Help Time", "Wait Time"])
    plt.title("CS106 LaIR Wait and Help Times")
    plt.xlabel("Time (minutes)")
    plt.ylabel("# Requests")
    plt.legend()
    plt.show()
Ejemplo n.º 5
0
    if 'func' not in ARGS or ARGS.func is None:
        parser.print_help()
    elif ARGS.time not in ['w', 'h', 't']:
        print("ERROR: invalid time '%s'" % ARGS.time)
    else:
        with tf.Graph().as_default():

            model = createModel(ARGS)
            loader = DataLoader()
            vectorizers = init_vectorizers()

            # Filter out bad requests if we are training on help time
            if ARGS.time == 'h':
                loader.loadData(
                    ARGS.data.name,
                    filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
            else:
                loader.loadData(ARGS.data.name)

            # Training
            if ARGS.func() == 'train':
                model.run(loader, vectorizers, ARGS.time, run_type='train')
                train_loss = model.run(loader,
                                       vectorizers,
                                       ARGS.time,
                                       run_type='dev')
                print("Train accuracy = %f" % (1 - train_loss))

            # Dev / Test
            else: