def decision_tree_tests(): xs1 = np.array([1,1,1,1,1,1]) xs2 = np.array([1,1,0,0,1,0]) xs3 = np.array([1,0,1,0,1,0]) xs4 = np.array([1,0,0,0,1,0]) ys = np.array([1,1,0,0,1,0]) ent_base = dt.entropy_calc(ys,[],[]) ent_x1 = dt.entropy_calc(ys,xs1,[0,1]) ent_x2 = dt.entropy_calc(ys,xs2,[0,1]) ent_x3 = dt.entropy_calc(ys,xs3,[0,1]) ent_x4 = dt.entropy_calc(ys,xs4,[0,1]) ent_x3_2 = dt.entropy_calc(ys,xs3,[0,1,2]) if ent_base != 1.0: print 'ERROR: Base case for entropy calculation incorrect.' if ent_x1 != 1.0: print 'Error: Random data should give entropy = 1!' print 'Calculated entropy is ' + str(ent_x1) if ent_x2 != 0.0: print 'Error: Perfect data should give entropy = 0!' print 'Calculated entropy is ' + str(ent_x2) if ent_x3 < ent_x2: print 'Error: Imperfect data should do worse than perfect data!' print 'Malformed expression: ' + str(ent_x3) + '<' + str(ent_x2) if ent_x3 > ent_x1: print 'Error: Imperfect data should do better than random data!' print 'Malformed expression: ' + str(ent_x3) + '>' + str(ent_x1) if ent_x3 < ent_x4: print 'Error: Malformed expression: ' + str(ent_x3) + '<' + str(ent_x4) if ent_x3_2 != ent_x3: print 'Error: Empty class should not matter'
tag_master = tg.string_tags(xy_train['KC(Default)']) [tag_array,opp_array] = tg.tags_to_array(xy_train['KC(Default)'],xy_train['Opportunity(Default)'],tag_master) #Look up location of index in array #tag_master.index(SOME STRING) int_s = ['Correct First Attempt','Incorrects','Hints','Corrects'] for i in range(len(int_s)): xy_train[int_s[i]] = map(int,xy_train[int_s[i]]) y_pred = xy_train['Correct First Attempt'] #Check entropy of the data ent = dt.entropy_calc(y_pred,[0],[]) def step_normalize(stud_IDs,stud_dict,step_start_time,first_trans_time,corr_trans_time,step_end_time): # Normalizes the step start time by student's first transation time aa = np.copy(step_start_time) bb = np.copy(first_trans_time) dd = np.copy(step_end_time) cc = np.copy(corr_trans_time) for stud in stud_dict: print('Processing student ' + str(stud)) rel_steps = [step_start_time[i] for i in np.where(stud_IDs == stud)][0] rel_ind = np.where(stud_IDs == stud)[0] # In case this array isn't sorted... rel_steps_ind_sort = np.argsort(rel_steps) fnz = [i for i in rel_steps if i > 0]