def main():
    feature_functions = [get_character_count, get_word_count]
    predictions = {}

    for essay_set in cio.get_essay_sets():
        print("Making Predictions for Essay Set %s" % essay_set)
        train = list(cio.essays_by_set(essay_set))
        features = extract_features([x["EssayText"] for x in train],
                                    feature_functions)
        rf = RandomForestRegressor(n_estimators=50)
        rf.fit(features, [float(x["Score1"]) for x in train])

        test = list(
            cio.essays_by_set(essay_set,
                              "../Data/public_leaderboard_rel_2.tsv"))
        features = extract_features([x["EssayText"] for x in test],
                                    feature_functions)
        predicted_scores = rf.predict(features)
        for essay_id, pred_score in zip([x["Id"] for x in test],
                                        predicted_scores):
            predictions[essay_id] = round(pred_score)

    output_file = "../Submissions/length_benchmark.csv"
    print("Writing submission to %s" % output_file)
    f = open(output_file, "w")
    f.write("id,essay_score\n")
    for key in sorted(predictions.keys()):
        f.write("%d,%d\n" % (key, predictions[key]))
    f.close()
Exemple #2
0
def main():
    predictions = {}

    for essay_set in cio.get_essay_sets():
        print("Making Predictions for Essay Set %s" % essay_set)
        train = list(cio.essays_by_set(essay_set))
        bag = features.train_bag(" ".join(x["EssayText"] for x in train), 500)
        fea = [features.bag_representation(bag, x["EssayText"]) for x in train]
        rf = RandomForestRegressor(n_estimators=50)
        rf.fit(fea, [float(x["Score1"]) for x in train])

        test = list(
            cio.essays_by_set(essay_set,
                              "../Data/public_leaderboard_rel_2.tsv"))
        fea = [features.bag_representation(bag, x["EssayText"]) for x in test]
        predicted_scores = rf.predict(fea)
        for essay_id, pred_score in zip([x["Id"] for x in test],
                                        predicted_scores):
            predictions[essay_id] = round(pred_score)

    output_file = "../Submissions/bag_of_words_benchmark.csv"
    print("Writing submission to %s" % output_file)
    f = open(output_file, "w")
    f.write("id,essay_score\n")
    for key in sorted(predictions.keys()):
        f.write("%d,%d\n" % (key, predictions[key]))
    f.close()
def main():    
    feature_functions = [get_character_count, get_word_count]
    predictions = {}

    for essay_set in cio.get_essay_sets():
        print("Making Predictions for Essay Set %s" % essay_set)
        train = list(cio.essays_by_set(essay_set))
        features = extract_features([x["EssayText"] for x in train], feature_functions)
        rf = RandomForestRegressor(n_estimators = 50)
        rf.fit(features,[float(x["Score1"]) for x in train])

        test = list(cio.essays_by_set(essay_set, "../Data/public_leaderboard_rel_2.tsv"))
        features = extract_features([x["EssayText"] for x in test], feature_functions)
        predicted_scores = rf.predict(features)
        for essay_id, pred_score in zip([x["Id"] for x in test], predicted_scores):
            predictions[essay_id] = round(pred_score)
    
    output_file = "../Submissions/length_benchmark.csv"
    print("Writing submission to %s" % output_file)
    f = open(output_file, "w")
    f.write("id,essay_score\n")
    for key in sorted(predictions.keys()):
        f.write("%d,%d\n" % (key,predictions[key]))
    f.close()
def main():    
    predictions = {}

    for essay_set in cio.get_essay_sets():
        print("Making Predictions for Essay Set %s" % essay_set)
        train = list(cio.essays_by_set(essay_set))
        bag = features.train_bag(" ".join(x["EssayText"] for x in train), 500)
        fea =  [features.bag_representation(bag, x["EssayText"]) for x in train]
        rf = RandomForestRegressor(n_estimators = 50)
        rf.fit(fea,[float(x["Score1"]) for x in train])

        test = list(cio.essays_by_set(essay_set, "../Data/public_leaderboard_rel_2.tsv"))
        fea = [features.bag_representation(bag, x["EssayText"]) for x in test]
        predicted_scores = rf.predict(fea)
        for essay_id, pred_score in zip([x["Id"] for x in test], predicted_scores):
            predictions[essay_id] = round(pred_score)
    
    output_file = "../Submissions/bag_of_words_benchmark.csv"
    print("Writing submission to %s" % output_file)
    f = open(output_file, "w")
    f.write("id,essay_score\n")
    for key in sorted(predictions.keys()):
        f.write("%d,%d\n" % (key,predictions[key]))
    f.close()