def scientist(self, game_ended): #quitting criteria quitting = (self.total_cards > 20) and (self.guesses_correct > 20) if(game_ended or quitting): #if we are ending the game if(not game_ended): self.ended_game = True if(self.hypothesis == None): self.hypothesis = DecisionTree() self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES) return self.hypothesis.get_rule() else: #if we need to rebuild the tree, rebuild it if(len(self.training_data) > 0 and self.rebuildTree): if(self.hypothesis == None): self.hypothesis = DecisionTree() #rebuild the tree #print("REBUILDING") self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES) #pick a card and refill hand card = self.pick_card(self.BOARD[-2][0], self.BOARD[-1][0]) #index = self.hand.index(card) #self.hand = self.hand[:index] + self.hand[index+1:] + [self.generate_random_card()] #record what number card we played self.cards_played.append(self.total_cards) #play the card return card
def fit(self, x_train, y_train): len_train_data = len(x_train) for i in range(self.no_estimators): print(("\rRunning estimator {0}/{1}" "...".format(i + 1, self.no_estimators)), end='') # time.sleep(1) if self.bootstrap: idx = np.random.randint(0, len_train_data, len_train_data) x_train_tree = x_train[idx] y_train_tree = y_train[idx] # test here please else: x_train_tree = x_train y_train_tree = y_train dt = DecisionTree(random_state=None, split_measure=self.split_measure, min_impurity_split=self.min_impurity_split, max_depth=self.max_depth, min_samples_split=self.min_samples_split, no_splits=self.no_splits, max_features=self.max_features, print_flag=False) dt.fit(x_train_tree, y_train_tree) self.fitted_trees.append(dt) print("\r")
def rf_train(data, forest_size, tree_depth): print("rf train") classifiers = [] for i in range(forest_size): tree = DecisionTree() tree.root = tree.train(data, tree_depth, True) classifiers.append(tree) return classifiers
def train(self, data, forest_size, tree_depth): classifiers = [] for i in range(forest_size): forest_data = data tree = DecisionTree() tree.root = tree.train(forest_data, depth, True) classifiers.append() return classifiers
def __init__(self, num_tree, algorithm='ID3', mode='classification'): super().__init__() self.mode = mode self.classification = 'classification' self.regression = 'regression' self.RF = [ DecisionTree(algorithm, mode, RF=True) for _ in range(num_tree) ]
if __name__ == "__main__": # 加载数据 data = load_iris() x = data['data'] y = data['target'] # 分割数据 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=40, shuffle=True) train_data = np.c_[X_train, y_train] ################################# 决策树运行 ################################### # 加载决策树模型 tree = DecisionTree(mode='classification') # 训练 tree.train(train_data) # 剪枝 # tree.pruning(train_data, 0.03) # 预测 y_pre = tree(X_test) ################################# 随机森林运行 ################################### # # 加载随机森林模型 # RF = RandomForest(num_tree=10) # # 训练随机森林 # RF.train(train_data, RF_k=3) # # 预测 # y_pre = RF.predict(X_test)
def main(): # MAKE SURE ALL OF THESE SWITCHES ARE SET CORRECTLY!!!! random_forest_switch = False error_rate_switch = True test_switch = False spam_val = False input_file, test_input_file, index, label, categorical = census() if not spam_val: replace_missing_values(input_file, categorical) mean, mode = mean_and_mode(input_file) impute(input_file, mean, mode, categorical) random.shuffle(input_file) else: input_file = shuffle(input_file) train = input_file[index:] validation = input_file[:index] train_df = pd.DataFrame(train) train_d = pd.get_dummies(train_df) validation_df = pd.DataFrame(validation) validation_d = pd.get_dummies(validation_df) if test_switch: replace_missing_values(test_input_file) test_mean, test_mode = mean_and_mode(test_input_file) impute(test_input_file, test_mean, test_mode) test_df = pd.DataFrame(test_input_file) test_d = pd.get_dummies(test_df) if random_forest_switch: all_classifiers = rf_train(train_d, 30, 18) predict = rf_predict(all_classifiers, validation_d) print("RF PREDICT") print(predict[:50]) else: x_values = [] y_values = [] for i in range(21): if i == 0: continue x_values.append(i) classifier = DecisionTree() classifier.root = classifier.train(train_d, i) predict = classifier.predict(validation_d) total = 0 error = 0 validation_labels = validation_d[[label]] a = validation_labels[label] i = 0 for v in validation_labels[label]: if v != predict[i]: error += 1 total += 1 i += 1 print("ERROR RATE:") print(1 - (error/total)) y_values.append(1 - (error/total)) plot.plot(x_values, y_values, label = "census graphs") plot.legend() plot.grid() plot.xlabel("Depth") plot.ylabel("Accuracy") plot.savefig("census_tests") if error_rate_switch: # CALCULATE ERROR RATE total = 0 error = 0 validation_labels = validation_d[[label]] a = validation_labels[label] i = 0 for v in validation_labels[label]: if v != predict[i]: error += 1 total += 1 i += 1 print("ERROR RATE:") print(error/total) else: # WRITE TO CSV predictions = [["id", "category"]] i = 0 for v in predict: predictions += [[i, v]] i += 1 with open('spam_test_predictions.csv', 'w') as f: writer = csv.writer(f) writer.writerows(predictions)
def tree(X,y,i): model = DecisionTree(max_depth=i, stump_class=ds.DecisionStumpErrorRate) model.fit(X, y) y_pred = model.predict(X) error = np.mean(y != y_pred) print(i, ":", error)
class Player(object): def __init__(self, cards): """ Pretty self explanatory constructor """ #These variables replace the global variables in Phase I self.BOARD = [(c, []) for c in cards] self.VALUES = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"] self.SUITS = ["C", "D", "H", "S"] self.DECK = [x+y for x in self.VALUES for y in self.SUITS] #Helper variables for ATTRIBUTES cards_att = ["previous2", "previous", "current"] individuals_att = ["suit", "color", "even", "is_royal"] self.ATTRIBUTES = [x + "(" + str(y) + ")" for y in cards_att for x in individuals_att] self.ATTRIBUTES += [x + "(value(" + y + ")," + z + ")" for y in cards_att for z in self.VALUES for x in ["greater", "equal"]] self.ATTRIBUTES += [x + "(current" + "," + y + ")" for y in cards_att[:-1] for x in ["greater", "equal"]] self.ATTRIBUTES += [x + "(previous, previous2)" for x in ["greater", "equal"]] self.ATTRIBUTES += [x + "(value(current)" + ",value(" + y + ")" for y in cards_att[:-1] for x in ["greater", "equal"]] self.ATTRIBUTES += [x + "(value(previous), value(previous2))" for x in ["greater", "equal"]] self.ATTRIBUTES += ["Legal"] #To keep track of running score self.game_score = 0 self.ended_game = False #This is for our rule self.hypothesis = None self.training_data = [] #A boolean that will tell us if we need to update the tree self.rebuildTree = True #These are for our quitting criteria self.cards_played = [] self.total_cards = 0 self.guesses_correct = 0 #Setup our hand self.hand = [self.generate_random_card() for i in range(14)] def pick_card(self, prev2, prev): """ Assume that our hypothesis is correct, and play a card that the hypothsis predicts is illegal, therefore increasing our chances of getting a false play. That way we gain information. If all cards in our hand are predicted to be legal or if we don't have a hypothesis, then play at random. """ if not self.hypothesis: to_play = self.hand.pop(random.randrange(len(self.hand))) else: hyp = parse(self.hypothesis.get_rule()) for card in self.hand: if not hyp.evaluate((prev2, prev, card)): to_play = self.hand.pop(self.hand.index(card)) break else: to_play = self.hand.pop(random.randrange(len(self.hand))) self.hand.append(random.choice(self.DECK)) return to_play """ Returns random card in the deck """ def generate_random_card(self): return random.choice(self.VALUES) + random.choice(self.SUITS) """ Takes in a card and whether or not it was legal, updating the board state We will also update our training data here, and decide whether or not we need to rebuild the decision tree """ def update_card_to_boardstate(self, card, result): #Construct an element of the training data datum = self.create_datum(card) datum.append(result) datum = tuple(datum) self.training_data.append(datum) #If we have built a tree if(len(self.training_data) > 1 and self.hypothesis): #Figure out what our rule says about this card guess = self.guess_legal(datum) #If we were wrong, we need to rebuild the tree if(guess != result): self.rebuildTree = True self.guesses_correct = 0 #We were correct, and our tree is not proven wrong elif(guess == result and not self.rebuildTree): self.guesses_correct += 1 else: self.rebuildTree = True #print("REBUILD", self.rebuildTree) #Now we can update the board state if(result): self.BOARD.append((card, [])) else: self.BOARD[-1][1].append(card) #Increase our score (iff we played the card and it counts towards score) if(self.total_cards > 20 and self.cards_played[-1] == self.total_cards): #if the card was legal if(result): self.game_score += 1 else: self.game_score += 2 #Increase the total number of cards that we've seen self.total_cards += 1 """ This takes in a card and returns the datum (without the classification, which will be added later) This assumes that card has *NOT* been added to the BOARD, i.e. we have not "played" card yet """ def create_datum(self, card): prev2 = self.BOARD[-2][0] prev = self.BOARD[-1][0] cards = [prev2, prev, card] # we need suit, parity, color... individuals = [suit, color, even, is_royal] features = [x(y) for y in cards for x in individuals] # unfortunately we need features for comparing values (for each card) # to the numbers 1 to 13, to encompass numerical differences # this makes the feature list gigantic features += [x(str(y[:-1]), str(z)) for y in cards for z in self.VALUES for x in [greater, equal]] # compare the deck values of the cards to each other features += [x(card, y) for y in [prev2, prev] for x in [greater, equal]] + [x(prev, prev2) for x in [greater, equal]] #TODO: add anything else here that could possibly be a predicate that we split on features += [x(card[:-1], y[:-1]) for y in [prev2, prev] for x in [greater, equal]] features += [x(prev[:-1], prev2[:-1]) for x in [greater, equal]] return features """ Takes in a datum (create_datum(card)) and returns our hypothesis about whether the card is legal or not """ def guess_legal(self, datum): guess = self.hypothesis.predict(self.ATTRIBUTES, [datum])[0] if(guess == "Null"): guess = False return guess """ The core of the Player's decision making """ def scientist(self, game_ended): #quitting criteria quitting = (self.total_cards > 20) and (self.guesses_correct > 20) if(game_ended or quitting): #if we are ending the game if(not game_ended): self.ended_game = True if(self.hypothesis == None): self.hypothesis = DecisionTree() self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES) return self.hypothesis.get_rule() else: #if we need to rebuild the tree, rebuild it if(len(self.training_data) > 0 and self.rebuildTree): if(self.hypothesis == None): self.hypothesis = DecisionTree() #rebuild the tree #print("REBUILDING") self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES) #pick a card and refill hand card = self.pick_card(self.BOARD[-2][0], self.BOARD[-1][0]) #index = self.hand.index(card) #self.hand = self.hand[:index] + self.hand[index+1:] + [self.generate_random_card()] #record what number card we played self.cards_played.append(self.total_cards) #play the card return card """ This computes the score of the player """ def score(self, rule): equiv = self.check_equivalence(rule) if(equiv): self.game_score -= 75 if(self.ended_game): self.game_score -= 25 return self.game_score """ This checks to see if the rule is equivalent to our hypothesis This has a try catch because sometimes rule.evaluate fails (like with greater()) TODO: Maybe remove this? TODO: Maybe change this for vacuous stuff? -Maybe use (None, None, x) to see if the dealer could play x This would require a try catch because maybe None would cause it to fail -Maybe try and parse the rule to ignore prev2/prev for the first 2 cards etc? Like evaluate the parts of the rule that don't use prev/prev2 """ def check_equivalence(self, rule): try: hyp = parse(self.hypothesis.get_rule()) #print(self.hypothesis.get_rule()) for prev2 in self.DECK: for prev in self.DECK: for curr in self.DECK: # should check for vacuous equivalence if rule.evaluate((prev2, prev, curr)) != hyp.evaluate((prev2, prev, curr)): return False return True except : return False """ This is mostly a wrapper for scientist """ def play(self, game_ended=False): #from game import game_ended return self.scientist(game_ended) """ Just returns the board """ def boardState(self): return self.BOARD
x = data['data'] y = data['target'] # X = np.array([ # ['青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], # ['乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '是'], # ['乌黑', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], # ['青绿', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '是'], # ['浅白', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], # ['青绿', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '是'], # ['乌黑', '稍蜷', '浊响', '稍糊', '稍凹', '软粘', '是'], # ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '硬滑', '是'], # ['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '否'], # ['青绿', '硬挺', '清脆', '清晰', '平坦', '软粘', '否'], # ['浅白', '硬挺', '清脆', '模糊', '平坦', '硬滑', '否'], # ['浅白', '蜷缩', '浊响', '模糊', '平坦', '软粘', '否'], # ['青绿', '稍蜷', '浊响', '稍糊', '凹陷', '硬滑', '否'], # ['浅白', '稍蜷', '沉闷', '稍糊', '凹陷', '硬滑', '否'], # ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '否'], # ['浅白', '蜷缩', '浊响', '模糊', '平坦', '硬滑', '否'], # ['青绿', '蜷缩', '沉闷', '稍糊', '稍凹', '硬滑', '否'], # ]) Y = np.c_[x, y] # tree = DecisionTree(mode='regression') tree = DecisionTree(mode='classification') print(tree.train(Y)) print('\n') print(tree.pruning(Y, 0.03)) print(tree(x[9].reshape([1, -1]))) print(y[9]) # print(len((8,9)))