def best_threshold(X, Y, D): ''' Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. Input: X: a list of values, a numpy array of int/float values. Y: a list of values, a numpy array of int/float/string values. D: the weights of instances, a numpy float vector of length n Output: Output: th: the best threhold, a float scalar. g: the weighted information gain by using the best threhold, a float scalar. ''' ######################################### # INSERT YOUR CODE HERE # Find a list of possible cutting points threshold_potential = DT.cutting_points(X, Y) # Check for input of threhodl potential if np.all(threshold_potential == -np.inf): return -float('inf'), -1 # Getting infomation_gain list info_list = [] for threshold in np.nditer(threshold_potential): info_list.append(DS.information_gain(Y, X >= threshold, D)) # Getting the best threshold and information gain using best threshold g = max(info_list) th = threshold_potential[np.argmax(info_list)] ######################################### return th, g
def best_threshold(X, Y, D): ''' Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. Input: X: a list of values, a numpy array of int/float values. Y: a list of values, a numpy array of int/float/string values. D: the weights of instances, a numpy float vector of length n Output: th: the best threhold, a float scalar. g: the weighted information gain by using the best threhold, a float scalar. ''' ######################################### ## INSERT YOUR CODE HERE cp = DT.cutting_points(X, Y) if type(cp) == type(np.array([1])): g = -1 th = float('-inf') for i in cp: a = (np.ma.masked_where(X > i, X)).mask if DS.information_gain(Y, a, D) > g: g = DS.information_gain(Y, a, D) th = i else: g = -1 th = float('-inf') ######################################### return th, g
def best_threshold(X, Y, D): ''' Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. Input: X: a list of values, a numpy array of int/float values. Y: a list of values, a numpy array of int/float/string values. D: the weights of instances, a numpy float vector of length n Output: Output: th: the best threhold, a float scalar. g: the weighted information gain by using the best threhold, a float scalar. ''' ######################################### ## INSERT YOUR CODE HERE cp = DT.cutting_points(X, Y) th = -1 g = -1 try: for v in cp: XX = np.copy(X) XX = np.array(["T" if x > v else "F" for x in XX]) ig = DS.information_gain(Y, XX, D) if ig > g: th = v g = ig except TypeError: return -float('Inf'), -1 ######################################### return th, g
def best_threshold(X,Y,D): ''' Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. Input: X: a list of values, a numpy array of int/float values. Y: a list of values, a numpy array of int/float/string values. D: the weights of instances, a numpy float vector of length n Output: Output: th: the best threhold, a float scalar. g: the weighted information gain by using the best threhold, a float scalar. ''' ######################################### ## INSERT YOUR CODE HERE cp = DT.cutting_points(X,Y) ig = [] cp = list(cp) for p in cp: newX = X.copy() for i,x in enumerate(newX): if x < p: newX[i] = 0 else: newX[i] = 1 ig.append(DS.information_gain(Y,newX,D)) g = max(ig) th = cp[ig.index(g)] if th == float('-inf'): g = -1 ######################################### return th,g
def best_threshold(X, Y, D): ''' Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. Input: X: a list of values, a numpy array of int/float values. Y: a list of values, a numpy array of int/float/string values. D: the weights of instances, a numpy float vector of length n Output: Output: th: the best threhold, a float scalar. g: the weighted information gain by using the best threhold, a float scalar. ''' ######################################### ## INSERT YOUR CODE HERE cp = DT.cutting_points(X, Y) th = g = -1 if type(cp) == float: return -float('Inf'), -1 for c in cp: helper = [] for x in X: if x > c: helper.append('L') else: helper.append('S') # print(DS.entropy(Y, D), DS.conditional_entropy(Y, helper, D)) # print(helper) helper = np.asarray(helper) gg = DS.information_gain(Y, helper, D) if gg > g: th = c g = gg print(th, g) ######################################### return th, g
def best_threshold(X,Y,D): ''' Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. Input: X: a list of values, a numpy array of int/float values. Y: a list of values, a numpy array of int/float/string values. D: the weights of instances, a numpy float vector of length n Output: th: the best threhold, a float scalar. g: the weighted information gain by using the best threhold, a float scalar. ''' ig = lambda X,Y,threshold,D: DS.information_gain(Y,X>=threshold,D) ths = DT.cutting_points(X,Y) if np.all(ths == -np.inf): return -float('inf'),-1 gs = [ig(X,Y,i,D) for i in ths] g = max(gs) th = ths[np.argmax(gs)] return th,g