def test_data_size_vs_diff(dm, given_dict, infer_dict): #Read all data from data model dm.read_data(normalize_data=False) #attr_list = [U_UNIVERSITY_CODE, PROGRAM_CODE, UNIVERSITY, MAJOR_CODE, TERM] attr_list = [U_UNIVERSITY_CODE, PROGRAM_CODE, UNIVERSITY] #attr_list = [MAJOR_CODE, PROGRAM_CODE, TERM] #Size of data data_size = len(dm.data) #Step size = 10 steps step_size = data_size//10 #Get experiment data in a dict size = [] accuracy = [] for i in xrange(step_size, data_size, step_size): dm_test = DataModel("") dm_test.set_data(dm.data[:i]) exp_test = Experimenter(dm_test, attr_list) actual = exp_test.get_actual_result(given_dict, infer_dict) estimation = exp_test.generic_get_estimated_result(given_dict, infer_dict) size.append(i) accuracy.append(abs(estimation - actual)) print("Step:%d--->Actual:%f--->Estimate:%f" %(i, actual, estimation)) print "-------------------------------------------------------------" plt.figure() plt.plot(size, accuracy) plt.title("Data Size vs Accuracy") plt.show()
def perform_datasize_vs_efficiency(self, given_dict, infer_dict, max_datasize=None, steps=10): sizes, est_times, acc_times = [], [], [] if max_datasize is None: max_datasize = len(self.dm.data) data_step = max_datasize / steps for i in range(steps): cur_datasize = (i+1) * data_step data = self.dm.data while len(data) < cur_datasize: data.extend(self.dm.data) cur_data = data[:cur_datasize] cur_dm = DataModel("") cur_dm.set_data(cur_data) cur_exp = Experimenter(cur_dm, self.attr_list) (cur_est, cur_acc) = cur_exp.time_n_queries(given_dict, infer_dict) sizes.append(cur_datasize) est_times.append(float(sum(cur_est))/len(cur_est)) acc_times.append(float(sum(cur_acc))/len(cur_acc)) return (sizes, est_times, acc_times)
def perform_datasize_vs_accuracy(self, given_dict, infer_dict, max_datasize=None, steps=10): #Get experiment data in a dict size = [] accuracy = [] if max_datasize is None: max_datasize = len(self.dm.data) data_step = max_datasize / steps for i in range(steps): cur_datasize = (i+1) * data_step data = self.dm.data while len(data) < cur_datasize: data.extend(self.dm.data) cur_data = data[:cur_datasize] cur_dm = DataModel("") cur_dm.set_data(cur_data) cur_exp = Experimenter(cur_dm, self.attr_list) actual = cur_exp.get_actual_result(given_dict, infer_dict) estimation = cur_exp.generic_get_estimated_result(given_dict, infer_dict) size.append(cur_datasize) accuracy.append(abs(estimation - actual)) return (size, accuracy)