Esempio n. 1
0
def test_regressor_toy(regressor):
    X = [
        {'outlook': 'rain', 'temperature': 'hot', 'humidity': 'high', 'wind': False},
        {'outlook': 'rain', 'temperature': 'hot', 'humidity': 'high', 'wind': True},
        {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'high', 'wind': False},
        {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'high', 'wind': False},
        {'outlook': 'sunny', 'temperature':'cool', 'humidity': 'normal', 'wind': False},
        {'outlook': 'sunny', 'temperature':'cool', 'humidity': 'normal', 'wind': True},
        {'outlook': 'overcast', 'temperature':'cool', 'humidity': 'normal', 'wind': True},
        {'outlook': 'rain', 'temperature':'mild', 'humidity': 'high', 'wind': False},
        {'outlook': 'rain', 'temperature':'cool', 'humidity': 'normal', 'wind': False},
        {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'normal', 'wind': False},
        {'outlook': 'rain', 'temperature':'mild', 'humidity': 'normal', 'wind': True},
        {'outlook': 'overcast', 'temperature':'mild', 'humidity': 'high', 'wind': True},
        {'outlook': 'overcast', 'temperature':'hot', 'humidity': 'normal', 'wind': False},
        {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'high', 'wind': True},
        ]

    Y = np.array([26,30,48,46,62,23,43,36,38,48,48,62,44,30])

    vectorizer = DictVectorizer()
    data = vectorizer.fit_transform(X)

    print(vectorizer.feature_to_index)
    print(vectorizer.index_to_feature_type)
    regressor.set_index_to_feature_type(index_to_feature_type=vectorizer.index_to_feature_type)


    regressor.fit(data, Y)
    #tree.print_tree()
    #x = vectorizer.transform({'outlook': 'rain', 'temperature':'mild', 
    #                          'humidity': 'normal', 'wind': False})
    #print(f"predicting: ['rain', 'mild', 'normal', False] = {x}")

    cross_validation(regressor, data, Y, task_type='regression', num_folds =len(Y))
Esempio n. 2
0
def test_regressor_boston_house(regressor):
    # this actually takes many seconds, even though the data is small
    d = load_boston()
    index_to_feature_type = defaultdict(lambda: 'numerical')
    regressor.set_index_to_feature_type(index_to_feature_type)
    #regressor.fit(d.data, d.target)
    cross_validation(regressor, d.data, d.target, task_type='regression', num_folds=5)
Esempio n. 3
0
def test_classifier_breast_cancer(classifier):
    # this actually takes many seconds, even though the data is small
    d = load_breast_cancer()
    index_to_feature_type = defaultdict(lambda: 'numerical')
    classifier.set_index_to_feature_type(index_to_feature_type)
    #classifier.fit(d.data, d.target)
    print(d.data.shape)
    cross_validation(classifier, d.data, d.target, task_type='classification', num_folds=3)
Esempio n. 4
0
def test_decision_tree_regressor_boston_house():
    # this actually takes many seconds, even though the data is small
    d = load_boston()
    print(d)
    tree = DecisionTreeRegressor(max_depth=3)
    index_to_feature_type = defaultdict(lambda: 'numerical')
    tree.set_index_to_feature_type(index_to_feature_type)
    tree.fit(d.data, d.target)
    tree.print_tree()
    cross_validation(tree,
                     d.data,
                     d.target,
                     task_type='regression',
                     num_folds=5)
Esempio n. 5
0
def test_random_forest_classifier_breast_cancer():
    # this actually takes many seconds, even though the data is small
    d = load_breast_cancer()
    forest = RandomForestClassifier(num_trees=50,
                                    max_depth=None,
                                    num_features_to_sample_from=(int(
                                        d.data.shape[1] / 3)))
    index_to_feature_type = defaultdict(lambda: 'numerical')
    forest.set_index_to_feature_type(index_to_feature_type)
    #forest.fit(d.data, d.target)
    #forest.print_tree()
    cross_validation(forest,
                     d.data,
                     d.target,
                     num_folds=5,
                     task_type='classification')
Esempio n. 6
0
def test_linear_regressor_diabetes():
    d = load_diabetes()
    print(d)
    regressor = LinearRegressor(max_iters=5000)
    #regressor.fit(d.data, d.target)
    #print(regressor.b)
    #print(regressor.thetas)

    regression = LinearRegression()
    #regression.fit(d.data, d.target)
    #print(regression.intercept_)
    #print(regression.coef_)
    cross_validation(regressor,
                     d.data,
                     d.target,
                     task_type='regression',
                     num_folds=10)
Esempio n. 7
0
def test_classifier_toy(classifier):
    # decision tree people really want to know if they should play tennis
    X = [
        {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'wind': 'weak'},
        {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'wind': 'strong'},
        {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'high', 'wind': 'weak'},
        {'outlook': 'rain', 'temperature':'mild', 'humidity': 'high', 'wind': 'weak'},
        {'outlook': 'rain', 'temperature':'cool', 'humidity': 'normal', 'wind': 'weak'},
        {'outlook': 'rain', 'temperature':'cool', 'humidity': 'normal', 'wind': 'strong'},
        {'outlook': 'overcast', 'temperature':'cool', 'humidity': 'normal', 'wind': 'strong'},
        {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'high', 'wind': 'weak'},
        {'outlook': 'sunny', 'temperature':'cool', 'humidity': 'normal', 'wind': 'weak'},
        {'outlook': 'rain', 'temperature':'mild', 'humidity': 'normal', 'wind': 'weak'},
        {'outlook': 'sunny', 'temperature':'mild', 'humidity': 'normal', 'wind': 'strong'},
        {'outlook': 'overcast', 'temperature':'mild', 'humidity': 'high', 'wind': 'strong'},
        {'outlook': 'overcast', 'temperature':'hot', 'humidity': 'normal', 'wind': 'weak'},
        {'outlook': 'rain', 'temperature':'mild', 'humidity': 'high', 'wind': 'strong'},
        ]

    Y = np.array([0,0,1,1,1,0,1,0,1,1,1,1,1,0])

    vectorizer = DictVectorizer()
    data = vectorizer.fit_transform(X)

    print(vectorizer.feature_to_index)
    print(vectorizer.index_to_feature_type)

    try:
        classifier.set_index_to_feature_type(index_to_feature_type=vectorizer.index_to_feature_type)
    except:
        # kinda gross. i wanna refactor this to not need the vectorizer to pass in this info.
        # just let the fit methods figure this out. less work then for the general API
        pass

    #classifier.fit(data, Y)
    #classifier.print_tree()
    x = vectorizer.transform({'outlook': 'rain', 'temperature':'mild', 
                              'humidity': 'normal', 'wind': 'weak'})
    #print(f"predicting: ['rain', 'mild', 'normal', 'weak'] = {x}")
    #print(classifier.predict(x))

    cross_validation(classifier, data, Y, task_type='classification', num_folds=3)