예제 #1
0
def evaluateScore(pairs, features, labels):
    vsm_models = dict(models.vsm_models);

	# For every ML Algorithm we trained...
    for algorithm, name in algorithms:

        # For every question (it has 5 answers)
        num_eval = len(pairs)/5;
        guesses = [];
        for i in range(num_eval):
            correct = []

            # Go through each answer/question pair
            for (answer, question), phi, label in zip(pairs[i*5:i*5+5], features[i*5:i*5+5], labels[i*5:i*5+5]):

                # Get prediction
                prediction =  algorithm.predict(phi)[0];

                # If we predict the answer is right
                if prediction == 1:
                    correct.append(answer)
            
            # If we think no answer is right, we omit to answer the question
            if len(correct) == 0:
                guesses.append((-1, 0));

            # Otherwise, we choose the first model that gets the answer right
            else:
                guesses.append((correct[0], question.getCorrectWord()))

		# How did this ML algorithm do? (Evaluation)
        scoring.score_model(guesses, verbose=True, modelname=name);
예제 #2
0
def evaluateScore(pairs, features, labels):
    vsm_models = dict(models.vsm_models)

    # For every ML Algorithm we trained...
    for algorithm, name in algorithms:

        # For every question (it has 5 answers)
        num_eval = len(pairs) / 5
        guesses = []
        for i in range(num_eval):
            correct = []

            # Go through each answer/question pair
            for (answer,
                 question), phi, label in zip(pairs[i * 5:i * 5 + 5],
                                              features[i * 5:i * 5 + 5],
                                              labels[i * 5:i * 5 + 5]):

                # Get prediction
                prediction = algorithm.predict(phi)[0]

                # If we predict the answer is right
                if prediction == 1:
                    correct.append(answer)

            # If we think no answer is right, we omit to answer the question
            if len(correct) == 0:
                guesses.append((-1, 0))

            # Otherwise, we choose the first model that gets the answer right
            else:
                guesses.append((correct[0], question.getCorrectWord()))

# How did this ML algorithm do? (Evaluation)
        scoring.score_model(guesses, verbose=True, modelname=name)
예제 #3
0
def main(questions, glove):

    #####################################################################################################################
    ################################################### MODELS ##########################################################
    #####################################################################################################################

    # Returns answer word based on random chance, given the answers 
    def randomModel(question, distfunc=cosine, threshold=2, rev=False):
        return question.answers[random.randint(0,len(question.answers)) - 1];

    #####################################################################################################################
    ################################################# EVAL MODELS #######################################################
    #####################################################################################################################

    distances = [
        (kldist, "kldist"),
        (jsd, "jsd"),
        (cosine, "cosine"),
        (L2, "L2"),
        (jaccard, "jaccard")
    ];

    param_models = [
        ("Sentence", sentenceModel),
        ("Distance Model", distanceModel),
        ("Weighted VSM", weightedSentenceModel),
        ("Double Blank Combo VSM", doubleSentenceModel),
        ("Double Blank Max VSM", doubleSentenceMaxModel),
        ("Adjective Model", adjectiveModel),
        ("Noun Model", nounModel),
        ("Verb Model", verbModel)
    ];

    for name, model in param_models:
        if name == "Weighted VSM":
            scoring.score_model( [(model(glove, unigrams, q, threshold=.9)[0], q.getCorrectWord()) for q in questions], verbose=True, modelname=name)
        else:
            scoring.score_model( [(model(glove, q, threshold=.9)[0], q.getCorrectWord()) for q in questions], verbose=True, modelname=name)

    os.system("say Finished");
예제 #4
0
파일: mlShit.py 프로젝트: lxrogers/CS221SAT
def distanceTrainDataDevTest(dev=True):
    train_data = None
    eval_data = None
    if dev:
        train_data, eval_data = get_distance_ml_training.getEvaluatingTrainingData(
        )
    else:
        train_data, eval_data = get_distance_ml_training.getTestingTrainingData(
        )
    train = train_data[0]
    train_labels = train_data[1]
    evals = eval_data[0]
    eval_labels = eval_data[1]

    num_dev = len(evals) / 5

    print "Training the models..."

    for model, name in distance_models:
        model.fit(train, train_labels)

    print "Get Training Error..."  # TODO: need to actually evlauate + get a SAT score + number correct + not correct for reporting purposes. Do same thing as below
    for model, name in distance_models:
        print "\nML Algorithm Training: ", name
        print "Scored: ", model.score(train, train_labels)

    print "Evaluating Models On Dev..."
    for model, name in distance_models:
        num_right = 0
        num_not_answer = 0
        num_wrong = 0
        for i in range(num_dev):
            vals = model.predict(evals[i * 5:i * 5 + 5])
            if 1 in vals:
                pred_index = numpy.where(vals == 1)[0][0]
                answer_index = eval_labels[i * 5:i * 5 + 5].index(1)
                if pred_index == answer_index:
                    num_right += 1
                else:
                    num_wrong += 1
            else:
                num_not_answer += 1
        print "\nML Algorithm Dev: ", name
        print "Answered Correctly: %d Did Not Answer: %d" % (num_right,
                                                             num_not_answer)
        print "Percent Right: ", model.score(evals, eval_labels)
        print "SAT Score: ", scoring.score_model([(1, 1)] * num_right +
                                                 [(None, 1)] * num_not_answer +
                                                 [(0, 1)] * num_wrong)
예제 #5
0
def evaluateScore(questions, features, labels):
	vsm_models = dict(models.vsm_models);

	# For every ML Algorithm we trained...
	for algorithm, name in algorithms:

		# Go through and using the model it thinks will guess right, guess the question
		guesses = [];
		for question, phi, label in zip(questions, features, labels):

			# The model the algorithm thinks we should use
			prediction =  algorithm.predict(phi)[0];

			# If we predict we can't get this question right
			if(prediction == "No model"): guesses.append((-1, 0));
			else:
				# Get the model we're going to use
				model = vsm_models[algorithm.predict(phi)[0]];

				# Using the model to answer the question
				guesses.append((model(glove, question)[0], question.getCorrectWord()));

		# How did this ML algorithm do?
		scoring.score_model(guesses, verbose=True, modelname=name);
예제 #6
0
파일: mlShit.py 프로젝트: lxrogers/CS221SAT
def distanceTrainDataDevTest(dev=True):
    train_data = None
    eval_data = None
    if dev:
        train_data, eval_data = get_distance_ml_training.getEvaluatingTrainingData();
    else:
        train_data, eval_data = get_distance_ml_training.getTestingTrainingData();
    train = train_data[0]
    train_labels = train_data[1]
    evals = eval_data[0]
    eval_labels = eval_data[1]
    
    num_dev = len(evals)/5

    print "Training the models...";

    for model, name in distance_models:
	    model.fit(train, train_labels);

    print "Get Training Error..." # TODO: need to actually evlauate + get a SAT score + number correct + not correct for reporting purposes. Do same thing as below
    for model, name in distance_models:
        print "\nML Algorithm Training: ", name;
        print "Scored: ", model.score(train, train_labels);

    print "Evaluating Models On Dev..."
    for model, name in distance_models:
        num_right = 0
        num_not_answer = 0
        num_wrong = 0;
        for i in range(num_dev):
            vals = model.predict(evals[i*5:i*5+5])
            if 1 in vals:
                pred_index = numpy.where(vals==1)[0][0]
                answer_index = eval_labels[i*5:i*5+5].index(1)
                if pred_index == answer_index:
                    num_right += 1
                else:
                	num_wrong += 1
            else:
                num_not_answer += 1
        print "\nML Algorithm Dev: ", name;
        print "Answered Correctly: %d Did Not Answer: %d" %(num_right, num_not_answer)
        print "Percent Right: ", model.score(evals, eval_labels);
        print "SAT Score: ", scoring.score_model([(1,1)]*num_right + [(None,1)]*num_not_answer + [(0,1)]*num_wrong);
예제 #7
0
def scoring():
    #check the score of the deployed model
    score = score_model()
    return str(score)
예제 #8
0
new_files = False
for filename in os.listdir(input_folder_path):
    if input_folder_path + "/" + filename not in ingested_files:
        new_files = True

##################Deciding whether to proceed, part 1
#if you found new data, you should proceed. otherwise, do end the process here
if not new_files:
    print("No new ingested data, exiting")
    exit(0)

##################Checking for model drift
#check whether the score from the deployed model is different from the score from the model that uses the newest ingested data
ingestion.merge_multiple_dataframe()
scoring.score_model(production=True)

with open(os.path.join(prod_deployment_path, "latestscore.txt"),
          "r") as report_file:
    old_f1 = float(report_file.read())

with open(os.path.join(model_path, "latestscore.txt"), "r") as report_file:
    new_f1 = float(report_file.read())

##################Deciding whether to proceed, part 2
#if you found model drift, you should proceed. otherwise, do end the process here

if new_f1 >= old_f1:
    print(
        "Actual F1 (%s) is better/equal than old F1 (%s), no drift detected -> exiting"
        % (new_f1, old_f1))
def main():
    if(v): print "Loading passages...";
    passages = loadPassages(f);

    # Initialize all the external data
    if(v): print "Loading all external data...";
    tfidf_array, allWords = computeTFIDFArray(passages);
    unigrams, bigrams, trigrams = getGrams();
    glove = Glove(g, delimiter=" ", header=False, quoting=csv.QUOTE_NONE);
    cooccurrences = cooccurrence()

    if(v): print "Running models..."
    # Initialize arrays to keep answers
    rand, nn, sent, tfidf, gram, syn, wdn, cc, an = [], [], [], [], [], [], [], [], [];
    
    # Loop through all the questions
    for passage in passages:
        for question in passage.questions:

            # Find relevant word
            targetword = re.findall("[\xe2\x80\x9c\u2019\"\']([A-Za-z\s]+)[\xe2\x80\x9c\u2019\"\']", question.text)[0].lower();

            # Tokenize relevant sentence
            sentence = passage.text.split("\n")[int(re.findall("[0-9]+", question.text)[0]) - 1];
            sentence = re.split("[^A-Za-z0-9]", sentence);
            sentence = filter(lambda x: len(x) > 0, sentence);
            sentence = map(lambda x: x.strip().lower(), sentence);

            # Get correct answer
            correctAnswer = question.answers[question.correctAnswer];


            # Get answers
            randAnswer = randomModel(question.answers);
            nnAnswer = nearestNeighborModel(targetword, question.answers, glove, threshold=.48);
            sentAnswer = sentenceModel(sentence, question.answers, glove, threshold=.44);
            tfidfAnswer = tfidfModel(sentence, question.answers, tfidf_array, allWords, glove, threshold=.44);
            gramAnswer = gramModel(sentence, question.answers, targetword, unigrams, bigrams, trigrams, glove, threshold=.42);
            wdnvec, wdnAnswer = wordnetModel(targetword, sentence, question.answers, glove, threshold=.46)
            synAnswer = synonymModel(targetword, wdnvec, sentence, question.answers, bigrams, trigrams, glove, threshold=.34)
            ccAnswer = cooccurrenceModel(targetword, sentence, question.answers,cooccurrences, glove)
            anAnswer = analogyModel(targetword, sentence, question.answers, cooccurrences, glove)


            # Guess the word if we can answer it
            rand.append( (randAnswer, correctAnswer) );
            nn.append( (nnAnswer, correctAnswer) );
            sent.append( (sentAnswer, correctAnswer) );
            tfidf.append( (tfidfAnswer, correctAnswer) );
            gram.append( (gramAnswer, correctAnswer) );
            wdn.append( (wdnAnswer, correctAnswer) )
            syn.append( (synAnswer, correctAnswer) )
            cc.append( (ccAnswer, correctAnswer) )
            an.append(  (anAnswer, correctAnswer) )

    score_model(rand, verbose=True, modelname="Random Model");
    score_model(nn, verbose=True, modelname="Nearest Neighbor Model");
    score_model(sent, verbose=True, modelname="Sentence-Based Model");
    score_model(tfidf, verbose=True, modelname="TFIDF Model");
    score_model(gram, verbose=True, modelname="Gram Model");
    score_model(wdn, verbose=True, modelname="WordNet Model")
    score_model(syn, verbose=True, modelname="Synonym Model")
    score_model(cc, verbose=True, modelname="Cooccurrence Model")
    score_model(an, verbose=True, modelname="Analogy Model")
예제 #10
0
파일: score.py 프로젝트: lxrogers/CS221SAT
def score(question_dir="../data/cayman_all_training.txt",
          glove_file="../data/glove_vectors/glove.6B.300d.txt",
          ngram_path="../data/Holmes_Training_Data/norvig.txt",
          dev=True):
    print "Training N-Grams"  # Load/Generate N-grams
    unigrams, bigrams, cgrams = getGrams(path=ngram_path)

    print "Loading Questions"  # Load questions
    questions = loadQuestions(question_dir)

    # Holds questions to be evaluated
    eval_qs = None

    if dev:
        # Split into train/dev
        split = len(questions) - len(questions) / 10
        inform("Splitting Data: " + str(split) +
               " questions in training and " + str(len(questions) - split) +
               " in dev...")
        train_questions, eval_qs = questions[:split], questions[split:]
    else:
        eval_qs = questions

    print "Loading Glove"  # Loads Glove vectors
    glove = Glove(glove_file,
                  delimiter=" ",
                  header=False,
                  quoting=csv.QUOTE_NONE,
                  v=False)

    # For every VSM model
    for name, model in vsm_models:

        # We get the model's score
        print "Scoring ", name
        answer_guess_pairs = []
        for question in eval_qs:
            guess = None

            # Weighted VSM has an extra parameter
            if name == "Weighted VSM":
                guess = model(glove, question, unigrams)[0]
            else:
                guess = model(glove, question)[0]

            # Get the correct answer
            answer = question.getCorrectWord()

            # Add to tuple GOLD and guessed answers
            answer_guess_pairs.append((guess, answer))

        print "\n\n"
        scoring.score_model(answer_guess_pairs, verbose=True, modelname=name)

    # Now score Language models
    # For every Language model
    for name, model in language_models:

        # Do the same thing as before
        print "Scoring ", name
        answer_guess_pairs = []

        # For every question
        for question in eval_qs:

            # Generate guess from model
            guess = model(unigrams, bigrams, question)[0]

            # Find GOLD answer (correct answer)
            answer = question.getCorrectWord()

            # Add tuple for scoring
            answer_guess_pairs.append((guess, answer))

        print "\n\n"
        scoring.score_model(answer_guess_pairs, verbose=True, modelname=name)
예제 #11
0
def main():
    if (v): print "Loading passages..."
    passages = loadPassages(f)

    # Initialize all the external data
    if (v): print "Loading all external data..."
    tfidf_array, allWords = computeTFIDFArray(passages)
    unigrams, bigrams, trigrams = getGrams()
    glove = Glove(g, delimiter=" ", header=False, quoting=csv.QUOTE_NONE)
    cooccurrences = cooccurrence()

    if (v): print "Running models..."
    # Initialize arrays to keep answers
    rand, nn, sent, tfidf, gram, syn, wdn, cc, an = [], [], [], [], [], [], [], [], []

    # Loop through all the questions
    for passage in passages:
        for question in passage.questions:

            # Find relevant word
            targetword = re.findall(
                "[\xe2\x80\x9c\u2019\"\']([A-Za-z\s]+)[\xe2\x80\x9c\u2019\"\']",
                question.text)[0].lower()

            # Tokenize relevant sentence
            sentence = passage.text.split("\n")[
                int(re.findall("[0-9]+", question.text)[0]) - 1]
            sentence = re.split("[^A-Za-z0-9]", sentence)
            sentence = filter(lambda x: len(x) > 0, sentence)
            sentence = map(lambda x: x.strip().lower(), sentence)

            # Get correct answer
            correctAnswer = question.answers[question.correctAnswer]

            # Get answers
            randAnswer = randomModel(question.answers)
            nnAnswer = nearestNeighborModel(targetword,
                                            question.answers,
                                            glove,
                                            threshold=.48)
            sentAnswer = sentenceModel(sentence,
                                       question.answers,
                                       glove,
                                       threshold=.44)
            tfidfAnswer = tfidfModel(sentence,
                                     question.answers,
                                     tfidf_array,
                                     allWords,
                                     glove,
                                     threshold=.44)
            gramAnswer = gramModel(sentence,
                                   question.answers,
                                   targetword,
                                   unigrams,
                                   bigrams,
                                   trigrams,
                                   glove,
                                   threshold=.42)
            wdnvec, wdnAnswer = wordnetModel(targetword,
                                             sentence,
                                             question.answers,
                                             glove,
                                             threshold=.46)
            synAnswer = synonymModel(targetword,
                                     wdnvec,
                                     sentence,
                                     question.answers,
                                     bigrams,
                                     trigrams,
                                     glove,
                                     threshold=.34)
            ccAnswer = cooccurrenceModel(targetword, sentence,
                                         question.answers, cooccurrences,
                                         glove)
            anAnswer = analogyModel(targetword, sentence, question.answers,
                                    cooccurrences, glove)

            # Guess the word if we can answer it
            rand.append((randAnswer, correctAnswer))
            nn.append((nnAnswer, correctAnswer))
            sent.append((sentAnswer, correctAnswer))
            tfidf.append((tfidfAnswer, correctAnswer))
            gram.append((gramAnswer, correctAnswer))
            wdn.append((wdnAnswer, correctAnswer))
            syn.append((synAnswer, correctAnswer))
            cc.append((ccAnswer, correctAnswer))
            an.append((anAnswer, correctAnswer))

    score_model(rand, verbose=True, modelname="Random Model")
    score_model(nn, verbose=True, modelname="Nearest Neighbor Model")
    score_model(sent, verbose=True, modelname="Sentence-Based Model")
    score_model(tfidf, verbose=True, modelname="TFIDF Model")
    score_model(gram, verbose=True, modelname="Gram Model")
    score_model(wdn, verbose=True, modelname="WordNet Model")
    score_model(syn, verbose=True, modelname="Synonym Model")
    score_model(cc, verbose=True, modelname="Cooccurrence Model")
    score_model(an, verbose=True, modelname="Analogy Model")
예제 #12
0
파일: score.py 프로젝트: lxrogers/CS221SAT
def score(question_dir = "../data/cayman_all_training.txt", glove_file="../data/glove_vectors/glove.6B.300d.txt", ngram_path="../data/Holmes_Training_Data/norvig.txt", dev=True):
    print "Training N-Grams" # Load/Generate N-grams
    unigrams, bigrams, cgrams = getGrams(path=ngram_path)

    print "Loading Questions" # Load questions
    questions = loadQuestions(question_dir)

    # Holds questions to be evaluated
    eval_qs = None

    if dev:
        # Split into train/dev
        split = len(questions) - len(questions)/10;
        inform("Splitting Data: " + str(split) + " questions in training and " + str(len(questions) - split) + " in dev...");
        train_questions, eval_qs = questions[:split], questions[split:];
    else:
        eval_qs = questions

    print "Loading Glove" # Loads Glove vectors
    glove = Glove(glove_file, delimiter=" ", header=False, quoting=csv.QUOTE_NONE, v=False)

    # For every VSM model
    for name, model in vsm_models:

        # We get the model's score
        print "Scoring ", name
        answer_guess_pairs = []
        for question in eval_qs:
            guess = None

            # Weighted VSM has an extra parameter
            if name == "Weighted VSM":
                guess = model(glove, question, unigrams)[0]
            else:
                guess = model(glove, question)[0]

            # Get the correct answer
            answer = question.getCorrectWord()

            # Add to tuple GOLD and guessed answers
            answer_guess_pairs.append((guess, answer))
		
        print "\n\n"
        scoring.score_model(answer_guess_pairs, verbose=True, modelname=name)

    # Now score Language models
    # For every Language model
    for name, model in language_models:

        # Do the same thing as before
        print "Scoring ", name
        answer_guess_pairs = []

        # For every question
        for question in eval_qs:

            # Generate guess from model
            guess = model(unigrams, bigrams, question)[0]

            # Find GOLD answer (correct answer)
            answer = question.getCorrectWord()

            # Add tuple for scoring
            answer_guess_pairs.append((guess, answer))
		
        print "\n\n"
        scoring.score_model(answer_guess_pairs, verbose=True, modelname=name)
예제 #13
0
def main():
    if(v): print "Loading passages...";
    passages = loadPassages(f);

    # Initialize all the external data
    if(v): print "Loading all external data...";
    tfidf_array, allWords = computeTFIDFArray(passages);
    unigrams, bigrams, trigrams = getGrams();
    glove = Glove(g, delimiter=" ", header=False, quoting=csv.QUOTE_NONE);
    cooccurrences = cooccurrence()

    if(v): print "Running models..."
    # Initialize arrays to keep answers
    rand, nn, sent, tfidf, gram, syn, wdn, cc, an = [], [], [], [], [], [], [], [], [];
    
    # Loop through all the questions
    for passage in passages:
        for question in passage.questions:

            # Find relevant word
            targetword = re.findall("[\xe2\x80\x9c\u2019\"\']([A-Za-z\s]+)[\xe2\x80\x9c\u2019\"\']", question.text)[0].lower();

            # Tokenize relevant sentence
            sentence = passage.text.split("\n")[int(re.findall("[0-9]+", question.text)[0]) - 1];
            sentence = re.split("[^A-Za-z0-9]", sentence);
            sentence = filter(lambda x: len(x) > 0, sentence);
            sentence = map(lambda x: x.strip().lower(), sentence);

            # Get correct answer
            correctAnswer = question.answers[question.correctAnswer];


            # Get answers
            randAnswer = randomModel(question.answers);
            nnAnswer = nearestNeighborModel(targetword, question.answers, glove);
            sentAnswer = sentenceModel(sentence, question.answers, glove);
            tfidfAnswer = tfidfModel(sentence, question.answers, tfidf_array, allWords, glove);
            gramAnswer = gramModel(sentence, question.answers, targetword, unigrams, bigrams, trigrams, glove);
            synAnswer = synonymModel(targetword, sentence, question.answers, bigrams, trigrams, glove)
            wdnAnswer = wordnetModel(targetword, sentence, question.answers, glove, threshold=0.3)
            ccAnswer = cooccurrenceModel(targetword, sentence, question.answers,cooccurrences, glove)
            anAnswer = analogyModel(targetword, sentence, question.answers, cooccurrences, glove)


            # Guess the word if we can answer it
            rand.append( (randAnswer, correctAnswer) );
            nn.append( (nnAnswer, correctAnswer) );
            sent.append( (sentAnswer, correctAnswer) );
            tfidf.append( (tfidfAnswer, correctAnswer) );
            gram.append( (gramAnswer, correctAnswer) );
            syn.append( (synAnswer, correctAnswer) )
            wdn.append( (wdnAnswer, correctAnswer) )
            cc.append( (ccAnswer, correctAnswer) )
            an.append(  (anAnswer, correctAnswer) )

    print "NN: ", percentWrong(nn);
    print "Sent: ", percentWrong(sent);
    print "gram: ", percentWrong(gram);
    print "tfidf: ", percentWrong(tfidf);
    print "syn: ", percentWrong(syn);
    print "wdn: ", percentWrong(wdn);
    print "cc: ", percentWrong(cc);
    print "an: ", percentWrong(an);

    # names = ["NN","sent","gram","tfidf","syn","wdn","cc","an"]
    # for i, m1 in enumerate(zip(names, [nn, sent, gram, tfidf, syn, wdn, cc, an])):
    #     for j, m2 in enumerate(zip(names, [nn, sent, gram, tfidf, syn, wdn, cc, an])):
    #         if(i > j):
    #             print m1[0], m2[0], percentWrong(combineModels(m1[1], m2[1])), len(combineModels(m1[1], m2[1]));

    score_model(rand, verbose=True, modelname="Random Model");
    score_model(nn, verbose=True, modelname="Nearest Neighbor Model");
    score_model(sent, verbose=True, modelname="Sentence-Based Model");
    score_model(tfidf, verbose=True, modelname="TFIDF Model");
    score_model(gram, verbose=True, modelname="Gram Model");
    score_model(syn, verbose=True, modelname="Synonym Model")
    score_model(wdn, verbose=True, modelname="WordNet Model")
    score_model(cc, verbose=True, modelname="Cooccurrence Model")
    score_model(an, verbose=True, modelname="Analogy Model")