예제 #1
0
def decision_tree_tests():
    xs1 = np.array([1,1,1,1,1,1])
    xs2 = np.array([1,1,0,0,1,0])
    xs3 = np.array([1,0,1,0,1,0])
    xs4 = np.array([1,0,0,0,1,0])
    ys  = np.array([1,1,0,0,1,0])

    ent_base = dt.entropy_calc(ys,[],[])
    ent_x1 = dt.entropy_calc(ys,xs1,[0,1])
    ent_x2 = dt.entropy_calc(ys,xs2,[0,1])
    ent_x3 = dt.entropy_calc(ys,xs3,[0,1])
    ent_x4 = dt.entropy_calc(ys,xs4,[0,1])

    ent_x3_2 = dt.entropy_calc(ys,xs3,[0,1,2])

    if ent_base != 1.0:
        print 'ERROR: Base case for entropy calculation incorrect.'
    if ent_x1 != 1.0:
        print 'Error: Random data should give entropy = 1!'
        print 'Calculated entropy is ' + str(ent_x1)
    if ent_x2 != 0.0:
        print 'Error: Perfect data should give entropy = 0!'
        print 'Calculated entropy is ' + str(ent_x2)
    if ent_x3 < ent_x2:
        print 'Error: Imperfect data should do worse than perfect data!'
        print 'Malformed expression: ' + str(ent_x3) + '<' + str(ent_x2)
    if ent_x3 > ent_x1:
        print 'Error: Imperfect data should do better than random data!'
        print 'Malformed expression: ' + str(ent_x3) + '>' + str(ent_x1)
    if ent_x3 < ent_x4:
        print 'Error: Malformed expression: ' + str(ent_x3) + '<' + str(ent_x4)
    if ent_x3_2 != ent_x3:
        print 'Error: Empty class should not matter'
예제 #2
0
tag_master = tg.string_tags(xy_train['KC(Default)'])

[tag_array,opp_array] = tg.tags_to_array(xy_train['KC(Default)'],xy_train['Opportunity(Default)'],tag_master)

#Look up location of index in array
#tag_master.index(SOME STRING)
int_s = ['Correct First Attempt','Incorrects','Hints','Corrects']

for i in range(len(int_s)):
    xy_train[int_s[i]] = map(int,xy_train[int_s[i]])

y_pred = xy_train['Correct First Attempt']

#Check entropy of the data
ent = dt.entropy_calc(y_pred,[0],[])

def step_normalize(stud_IDs,stud_dict,step_start_time,first_trans_time,corr_trans_time,step_end_time):
# Normalizes the step start time by student's first transation time
    aa = np.copy(step_start_time)
    bb = np.copy(first_trans_time)
    dd = np.copy(step_end_time)
    cc = np.copy(corr_trans_time)
    for stud in stud_dict:
        print('Processing student ' + str(stud))
        rel_steps = [step_start_time[i] for i in np.where(stud_IDs == stud)][0]
        rel_ind = np.where(stud_IDs == stud)[0]

# In case this array isn't sorted...
        rel_steps_ind_sort = np.argsort(rel_steps)
        fnz = [i for i in rel_steps if i > 0]