def init_vectorizers(log=True): """ Initializes vectorizers. """ if log: print("Initializing vectorizers...", end="\r") # Create DataLoaders for train and full for the vectorizers trainD = DataLoader() trainD.loadData('../dataset/dataset-train.npy') fullD = DataLoader() fullD.loadData('../dataset/dataset.npy') # Create the vectorizers return [ TFIDFRequestTextVectorizer(trainD), HelperIDVectorizer(fullD), CourseIDVectorizer(), RequestTimeVectorizer(), StudentVectorizer(fullD), PastRequestsVectorizer(fullD), DueDateVectorizer() ]
import tensorflow as tf from data.DataLoader import DataLoader from ClassificationNNModel import ClassificationNNModel from random import uniform from RunNN import Config if __name__ == "__main__": config = Config("h", 10, "classification", 5) results = [] for i in range(100): print("Iteration %i" % i) config.lr = 10**uniform(-2, -6) with tf.Graph().as_default(): model = ClassificationNNModel(config) loader = DataLoader() loader.loadData("../dataset/dataset-train.npy", filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0, log=False) model.run(loader, "h", train=True, log=False) loss = model.run(loader, "h", train=False, log=False) results.append((config.lr, loss)) print(sorted(results, key=lambda r: r[1]))
def run(ModelType, args): print("\n********* %s %s Model *********" % (("Logistic" if ModelType == LogisticRegression else "Linear"), ("Wait" if args.time == 'w' else "Help"))) vectorizers = init_vectorizers() trainLoader = DataLoader() evaluateLoader = DataLoader() testLoader = DataLoader() # Filter out bad requests if we are running on help time if args.time == 'h': trainLoader.loadData('../dataset/dataset-train.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) evaluateLoader.loadData('../dataset/dataset-dev.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) testLoader.loadData('../dataset/dataset-test.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) else: trainLoader.loadData('../dataset/dataset-train.npy') evaluateLoader.loadData('../dataset/dataset-dev.npy') testLoader.loadData('../dataset/dataset-test.npy') if ModelType == LogisticRegression: buckets = make_buckets(trainLoader, args.buckets, args.time) mapper = make_bucket_mapper(buckets) else: mapper = lambda x: x labelFn = lambda x: mapper(x.getWaitTimeMinutes() if args.time == 'w' else x.getHelpTimeMinutes()) trainLabels = trainLoader.getLabels(labelFn) trainInputs = trainLoader.applyVectorizers(vectorizers, "train", args.time) devLabels = evaluateLoader.getLabels(labelFn) devInputs = evaluateLoader.applyVectorizers(vectorizers, "dev", args.time) testLabels = testLoader.getLabels(labelFn) testInputs = evaluateLoader.applyVectorizers(vectorizers, "test", args.time) trainedModel = trainModel(ModelType, trainInputs, trainLabels) evaluateModel(trainedModel, devInputs, devLabels) evaluateModel(trainedModel, testInputs, testLabels)
from data.DataLoader import DataLoader import matplotlib.pyplot as plt from collections import Counter from util import make_buckets, make_bucket_mapper if __name__ == "__main__": d = DataLoader() d.loadData('../dataset/dataset.npy') help_vals = [r.getHelpTimeMinutes() for r in d.laIRRequests] wait_vals = [r.getWaitTimeMinutes() for r in d.laIRRequests] bucket_vals = [i for i in range(0, 120, 10)] + [float('inf')] plt.hist([help_vals, wait_vals], bucket_vals, label=["Help Time", "Wait Time"]) plt.title("CS106 LaIR Wait and Help Times") plt.xlabel("Time (minutes)") plt.ylabel("# Requests") plt.legend() plt.show()
if 'func' not in ARGS or ARGS.func is None: parser.print_help() elif ARGS.time not in ['w', 'h', 't']: print("ERROR: invalid time '%s'" % ARGS.time) else: with tf.Graph().as_default(): model = createModel(ARGS) loader = DataLoader() vectorizers = init_vectorizers() # Filter out bad requests if we are training on help time if ARGS.time == 'h': loader.loadData( ARGS.data.name, filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) else: loader.loadData(ARGS.data.name) # Training if ARGS.func() == 'train': model.run(loader, vectorizers, ARGS.time, run_type='train') train_loss = model.run(loader, vectorizers, ARGS.time, run_type='dev') print("Train accuracy = %f" % (1 - train_loss)) # Dev / Test else: