def test_regressor_toy(regressor): X = [ {'outlook': 'rain', 'temperature': 'hot', 'humidity': 'high', 'wind': False}, {'outlook': 'rain', 'temperature': 'hot', 'humidity': 'high', 'wind': True}, {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'high', 'wind': False}, {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'high', 'wind': False}, {'outlook': 'sunny', 'temperature':'cool', 'humidity': 'normal', 'wind': False}, {'outlook': 'sunny', 'temperature':'cool', 'humidity': 'normal', 'wind': True}, {'outlook': 'overcast', 'temperature':'cool', 'humidity': 'normal', 'wind': True}, {'outlook': 'rain', 'temperature':'mild', 'humidity': 'high', 'wind': False}, {'outlook': 'rain', 'temperature':'cool', 'humidity': 'normal', 'wind': False}, {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'normal', 'wind': False}, {'outlook': 'rain', 'temperature':'mild', 'humidity': 'normal', 'wind': True}, {'outlook': 'overcast', 'temperature':'mild', 'humidity': 'high', 'wind': True}, {'outlook': 'overcast', 'temperature':'hot', 'humidity': 'normal', 'wind': False}, {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'high', 'wind': True}, ] Y = np.array([26,30,48,46,62,23,43,36,38,48,48,62,44,30]) vectorizer = DictVectorizer() data = vectorizer.fit_transform(X) print(vectorizer.feature_to_index) print(vectorizer.index_to_feature_type) regressor.set_index_to_feature_type(index_to_feature_type=vectorizer.index_to_feature_type) regressor.fit(data, Y) #tree.print_tree() #x = vectorizer.transform({'outlook': 'rain', 'temperature':'mild', # 'humidity': 'normal', 'wind': False}) #print(f"predicting: ['rain', 'mild', 'normal', False] = {x}") cross_validation(regressor, data, Y, task_type='regression', num_folds =len(Y))
def test_regressor_boston_house(regressor): # this actually takes many seconds, even though the data is small d = load_boston() index_to_feature_type = defaultdict(lambda: 'numerical') regressor.set_index_to_feature_type(index_to_feature_type) #regressor.fit(d.data, d.target) cross_validation(regressor, d.data, d.target, task_type='regression', num_folds=5)
def test_classifier_breast_cancer(classifier): # this actually takes many seconds, even though the data is small d = load_breast_cancer() index_to_feature_type = defaultdict(lambda: 'numerical') classifier.set_index_to_feature_type(index_to_feature_type) #classifier.fit(d.data, d.target) print(d.data.shape) cross_validation(classifier, d.data, d.target, task_type='classification', num_folds=3)
def test_decision_tree_regressor_boston_house(): # this actually takes many seconds, even though the data is small d = load_boston() print(d) tree = DecisionTreeRegressor(max_depth=3) index_to_feature_type = defaultdict(lambda: 'numerical') tree.set_index_to_feature_type(index_to_feature_type) tree.fit(d.data, d.target) tree.print_tree() cross_validation(tree, d.data, d.target, task_type='regression', num_folds=5)
def test_random_forest_classifier_breast_cancer(): # this actually takes many seconds, even though the data is small d = load_breast_cancer() forest = RandomForestClassifier(num_trees=50, max_depth=None, num_features_to_sample_from=(int( d.data.shape[1] / 3))) index_to_feature_type = defaultdict(lambda: 'numerical') forest.set_index_to_feature_type(index_to_feature_type) #forest.fit(d.data, d.target) #forest.print_tree() cross_validation(forest, d.data, d.target, num_folds=5, task_type='classification')
def test_linear_regressor_diabetes(): d = load_diabetes() print(d) regressor = LinearRegressor(max_iters=5000) #regressor.fit(d.data, d.target) #print(regressor.b) #print(regressor.thetas) regression = LinearRegression() #regression.fit(d.data, d.target) #print(regression.intercept_) #print(regression.coef_) cross_validation(regressor, d.data, d.target, task_type='regression', num_folds=10)
def test_classifier_toy(classifier): # decision tree people really want to know if they should play tennis X = [ {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'wind': 'weak'}, {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'wind': 'strong'}, {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'high', 'wind': 'weak'}, {'outlook': 'rain', 'temperature':'mild', 'humidity': 'high', 'wind': 'weak'}, {'outlook': 'rain', 'temperature':'cool', 'humidity': 'normal', 'wind': 'weak'}, {'outlook': 'rain', 'temperature':'cool', 'humidity': 'normal', 'wind': 'strong'}, {'outlook': 'overcast', 'temperature':'cool', 'humidity': 'normal', 'wind': 'strong'}, {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'high', 'wind': 'weak'}, {'outlook': 'sunny', 'temperature':'cool', 'humidity': 'normal', 'wind': 'weak'}, {'outlook': 'rain', 'temperature':'mild', 'humidity': 'normal', 'wind': 'weak'}, {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'normal', 'wind': 'strong'}, {'outlook': 'overcast', 'temperature':'mild', 'humidity': 'high', 'wind': 'strong'}, {'outlook': 'overcast', 'temperature':'hot', 'humidity': 'normal', 'wind': 'weak'}, {'outlook': 'rain', 'temperature':'mild', 'humidity': 'high', 'wind': 'strong'}, ] Y = np.array([0,0,1,1,1,0,1,0,1,1,1,1,1,0]) vectorizer = DictVectorizer() data = vectorizer.fit_transform(X) print(vectorizer.feature_to_index) print(vectorizer.index_to_feature_type) try: classifier.set_index_to_feature_type(index_to_feature_type=vectorizer.index_to_feature_type) except: # kinda gross. i wanna refactor this to not need the vectorizer to pass in this info. # just let the fit methods figure this out. less work then for the general API pass #classifier.fit(data, Y) #classifier.print_tree() x = vectorizer.transform({'outlook': 'rain', 'temperature':'mild', 'humidity': 'normal', 'wind': 'weak'}) #print(f"predicting: ['rain', 'mild', 'normal', 'weak'] = {x}") #print(classifier.predict(x)) cross_validation(classifier, data, Y, task_type='classification', num_folds=3)