예제 #1
0
def train():
    train, test = d.get_data(General.TRAIN_SET, General.TEST_SET)

    clf = BaseMethod(train, **SubjectivityFeatures.CLASSIFIER)
    #clf = Combined(SubjectivityFeatures.CLASSIFIER, PolarityFeatures.CLASSIFIER, train)
    print "Finished training in", "%.2f" % (time.time()-start_time), "sec"
    return clf
예제 #2
0
    f = open("results.txt", "w")
    f.write("Generated at: " + str(strftime("%Y-%m-%d %H:%M")) + "\n")
    f.write("Performed SVM grid search in %0.3fs\n" % (time() - t0))
    f.write("Best grid search CV score: {:0.3f}\n".format(100*grid.best_score_))
    f.write("Best parameters set:\n")

    best_parameters = grid.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        f.write("\t%s: %r\n" % (param_name, best_parameters[param_name]))

    cat_score = defaultdict(list)
    for s in grid.grid_scores_:
        for cat in s.parameters.keys():
            cat_score[cat + "=" + str(s.parameters[cat])].append(s.cv_validation_scores.mean()*100)

    f.write("\nEfficient params:\n")
    cat_score = {key: sum(val)/len(val) for key, val in cat_score.items()}
    for k, v in sorted(cat_score.items(), key=itemgetter(0), reverse=True):
        f.write("\t" + k + "\t" + str(v) + "\n")

    f.write("\nParam scores:\n")
    for s in grid.grid_scores_:
        f.write(str.format("{0:.3f}", s.cv_validation_scores.mean()*100) + "\t" +
                str.format("{0:.3f}", s.cv_validation_scores.std()*100) + "\t" + str(s.parameters) + "\n")
    f.close()


if __name__ == '__main__':
    train, test = d.get_data("../data/all.tsv", General.TEST_SET)
    grid_search(SubjectivityFeatures.CLASSIFIER['clf'], SubjectivityFeatures.CLASSIFIER['feature_union'], train[:, 0], train[:, 1])