def p2v_iterations(): scores_count = (config.score_range[1] - config.score_range[0]) + 1 training_count = sum(training_set.values()) X_prompt = np.empty([training_count, 1]) X_ess = np.empty([training_count, config.essay_embedding_size]) y_score = np.empty([training_count, scores_count]) for i, essay in enumerate(d.get_essays(filterer = training_and_prompt_filter())): X_prompt[i] = np.array([ essay["prompt_id"] ]) X_ess[i] = np.array(d.essay2vec(essay["essay_id"])) one_hot = np.zeros(scores_count) one_hot[essay[config.targeted_field] - 1] = 1.0 y_score[i] = one_hot testing_count = len(training_set.keys()) - training_count X_prompt_val = np.empty([testing_count, 1]) X_ess_val = np.empty([testing_count, config.essay_embedding_size]) y_score_val = np.empty([testing_count, scores_count]) for i, essay in enumerate(d.get_essays(filterer = testing_and_prompt_filter())): X_prompt_val[i] = np.array([ essay["prompt_id"] ]) X_ess_val[i] = np.array(d.essay2vec(essay["essay_id"])) one_hot = np.zeros(scores_count) one_hot[essay[config.targeted_field] - 1] = 1.0 y_score_val[i] = one_hot p2v.out_score.fit([X_prompt, X_ess], y_score, verbose=1, batch_size=config.batch_size, nb_epoch=config.p2v_training_iterations, validation_data=([X_prompt_val, X_ess_val], y_score_val))
def evaluate(essays_it): total_ct = 0 correct_ct = 0 acceptable_ct = 0 diff_g_1 = 0 for essay in essays_it: X_ess = np.array([ d.essay2vec(essay["essay_id"]) ]) X_prompt = np.array([ [ essay["prompt_id"] ] ]) prediction_raw = p2v.out_score.predict([ X_prompt, X_ess ], batch_size=1)[0] actual = essay[config.targeted_field] predicted = float_to_score(prediction_raw) total_ct+=1 acceptable = [essay[f] for f in config.validation_fields] if actual == predicted: correct_ct+=1 if predicted in acceptable: acceptable_ct+=1 if abs(actual - predicted) > 1: diff_g_1+=1 print "Absolute accuracy over set", (correct_ct / float(total_ct)) print "Acceptable accuracy over set", (acceptable_ct / float(total_ct)) print "Difference > 1 over set", (diff_g_1 / float(total_ct))
def p2v_iterations(): training_count = sum(training_set.values()) X_prompt = np.empty([training_count, 1]) X_ess = np.empty([training_count, config.essay_embedding_size]) y_score = np.empty([training_count, 1]) for i, essay in enumerate(d.get_essays(filterer = training_and_prompt_filter(config.targeted_prompts))): X_prompt[i] = np.array([ essay["prompt_id"] ]) X_ess[i] = np.array(d.essay2vec(essay["essay_id"])) y_score[i] = np.array([ score_to_float(essay[config.targeted_field]) ]) testing_count = len(training_set.keys()) - training_count X_prompt_val = np.empty([testing_count, 1]) X_ess_val = np.empty([testing_count, config.essay_embedding_size]) y_score_val = np.empty([testing_count, 1]) for i, essay in enumerate(d.get_essays(filterer = testing_and_prompt_filter(config.targeted_prompts))): X_prompt_val[i] = np.array([ essay["prompt_id"] ]) X_ess_val[i] = np.array(d.essay2vec(essay["essay_id"])) y_score_val[i] = np.array([ score_to_float(essay[config.targeted_field]) ]) p2v.out_score.fit([X_prompt, X_ess], y_score, verbose=1, batch_size=config.batch_size, nb_epoch=config.p2v_training_iterations, validation_data=([X_prompt_val, X_ess_val], y_score_val))