コード例 #1
0
def baseline(person, debug=1, mcnemar=False):
    rep_data = load_json("representatives")[person]
    rep_party = (rep_data["current_role"])["party"]
    path = "all_no_summary_linear_"
    # using data from all_no_summary_linear experiment coz data for all reps is generated

    data_set_test = json.loads(
        open("data_set/" + path + "test/" + str(person)).read()
    )  # Ugly but short way to open test data
    test_data_points = data_set_test["data"]
    data_points = test_data_points  # not concatenating train and test datasets
    dataset_length = len(data_points)

    numerrors = 0
    numfalseyes = 0
    numfalseno = 0

    stats = {}
    badpredict = []

    for point in data_points:
        bill_number = (point["bill"])["number"]
        sponsor_party = ((point["bill"])["sponsor_role"])["party"]
        # print sponsor_party
        voteobj = point["vote_obj"]
        vote = svm.getVoteOutcome(voteobj["option"])

        # predict vote based on sponsor_party
        predictedVote = 0
        if rep_party == sponsor_party:
            predictedVote = 1

        # evaluate performance
        if vote == 0:
            if predictedVote == 1:
                if mcnemar:
                    badpredict.append(bill_number)
                numerrors = numerrors + 1
                numfalseyes = numfalseyes + 1
        else:
            if predictedVote == 0:
                if mcnemar:
                    badpredict.append(bill_number)
                numerrors = numerrors + 1
                numfalseno = numfalseno + 1

    errorrate = float(numerrors) / float(dataset_length) * 100
    accuracy = float(100) - errorrate
    print
    print "====================================="
    print "       " + name
    print "====================================="
    print
    if debug >= 2:
        print "Number of errors: " + str(numerrors)
    if debug >= 1:
        print "Error Percentage: " + str(errorrate)
    if debug >= 1:
        print "Accuracy: " + str(accuracy)
    if debug >= 2:
        print "Number of false predictions of a yes vote: " + str(numfalseyes)
    if debug >= 2:
        print "Number of false predictions of a no vote: " + str(numfalseno)
    stats["Total Errors"] = numerrors
    stats["Accuracy"] = accuracy
    stats["Error Rate"] = errorrate
    stats["Total False Positives"] = numfalseyes
    stats["Total False Negatives"] = numfalseno
    stats["Dataset Size"] = str(dataset_length)
    if mcnemar:
        stats["WrongPredictions"] = badpredict
    return stats
コード例 #2
0
import gen_feature_data
import svm
import config
from gen_feature_data import load_json

'''
Generic experiment with no summary features.
'''
    
name = "summary"
rep_id = '400404'
rep_data = load_json('representatives')[rep_id]

config.features_to_ignore = []
config.force_preprocess = True
config.use_sparse_data = True


print
print '====================================='
print '       '+name
print '====================================='
print 


# Generate our feature vectors
# TODO(john): Cache this before we start to massive tests to save time
gen_feature_data.genExperimentData(rep_id, experiment_name=name)

# Train the SVM and print results
# TODO(john): Return results in such a way that we can analyze multiple reps