def ml_get_zoo_tree(train_size=0.75, max_depth=5, random_state=245245): # Load the zoo data dataset = pd.read_csv(os.path.join(os.path.dirname(__file__), "data", "zoo.csv")) # Drop the animal names since this is not a good feature to split the data on dataset = dataset.drop("animal_name", axis=1) # Split the data into a training and a testing set features = dataset.drop("class", axis=1) targets = dataset["class"] train_features, test_features, train_targets, test_targets = \ train_test_split(features, targets, train_size=train_size, random_state=random_state) # Train the model tree = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth) tree = tree.fit(train_features, train_targets) # Add the feature names to the tree for use in predict function tree._feature_names = features.columns return tree
def ml_get_zoo_tree(train_size=0.75, max_depth=5, random_state=245245): # Load the zoo data dataset = pd.read_csv( os.path.join(os.path.dirname(__file__), "data", "zoo.csv")) # Drop the animal names since this is not a good feature to split the data on dataset = dataset.drop("animal_name", axis=1) # Split the data into a training and a testing set features = dataset.drop("class", axis=1) targets = dataset["class"] train_features, test_features, train_targets, test_targets = \ train_test_split(features, targets, train_size=train_size, random_state=random_state) # Train the model tree = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth) tree = tree.fit(train_features, train_targets) # Add the feature names to the tree for use in predict function tree._feature_names = features.columns return tree