def baseline(person, debug=1, mcnemar=False): rep_data = load_json("representatives")[person] rep_party = (rep_data["current_role"])["party"] path = "all_no_summary_linear_" # using data from all_no_summary_linear experiment coz data for all reps is generated data_set_test = json.loads( open("data_set/" + path + "test/" + str(person)).read() ) # Ugly but short way to open test data test_data_points = data_set_test["data"] data_points = test_data_points # not concatenating train and test datasets dataset_length = len(data_points) numerrors = 0 numfalseyes = 0 numfalseno = 0 stats = {} badpredict = [] for point in data_points: bill_number = (point["bill"])["number"] sponsor_party = ((point["bill"])["sponsor_role"])["party"] # print sponsor_party voteobj = point["vote_obj"] vote = svm.getVoteOutcome(voteobj["option"]) # predict vote based on sponsor_party predictedVote = 0 if rep_party == sponsor_party: predictedVote = 1 # evaluate performance if vote == 0: if predictedVote == 1: if mcnemar: badpredict.append(bill_number) numerrors = numerrors + 1 numfalseyes = numfalseyes + 1 else: if predictedVote == 0: if mcnemar: badpredict.append(bill_number) numerrors = numerrors + 1 numfalseno = numfalseno + 1 errorrate = float(numerrors) / float(dataset_length) * 100 accuracy = float(100) - errorrate print print "=====================================" print " " + name print "=====================================" print if debug >= 2: print "Number of errors: " + str(numerrors) if debug >= 1: print "Error Percentage: " + str(errorrate) if debug >= 1: print "Accuracy: " + str(accuracy) if debug >= 2: print "Number of false predictions of a yes vote: " + str(numfalseyes) if debug >= 2: print "Number of false predictions of a no vote: " + str(numfalseno) stats["Total Errors"] = numerrors stats["Accuracy"] = accuracy stats["Error Rate"] = errorrate stats["Total False Positives"] = numfalseyes stats["Total False Negatives"] = numfalseno stats["Dataset Size"] = str(dataset_length) if mcnemar: stats["WrongPredictions"] = badpredict return stats
import gen_feature_data import svm import config from gen_feature_data import load_json ''' Generic experiment with no summary features. ''' name = "summary" rep_id = '400404' rep_data = load_json('representatives')[rep_id] config.features_to_ignore = [] config.force_preprocess = True config.use_sparse_data = True print print '=====================================' print ' '+name print '=====================================' print # Generate our feature vectors # TODO(john): Cache this before we start to massive tests to save time gen_feature_data.genExperimentData(rep_id, experiment_name=name) # Train the SVM and print results # TODO(john): Return results in such a way that we can analyze multiple reps