예제 #1
0
def pre_load_fixed_data():
    keywords = [
        "music", "food", "sport", "show", "movie", "car", "commercial",
        "party", "war", "hello"
    ]
    data = DataReader()
    return data.read("static/data/tweets.txt", keywords)
예제 #2
0
def test_read_function(url):
    """
    Test method for read method in read_data class with given
    url, does not crash if it passed
    """
    web_data = DataReader(url)
    data, columns = web_data.read()
    assert_equals(True, len(data) > 0)
    assert_equals(True, len(columns) > 0)
    assert_equals(252, len(data))
    assert_equals(15, len(columns))
    assert_equals(23, data.loc[0, 'Age (years)'])
    assert_equals('Weight (lbs)', columns[3])
    return data, columns
예제 #3
0
def main():
    sns.set(font_scale=0.7)
    # Process Data
    url = 'http://lib.stat.cmu.edu/datasets/bodyfat'
    web_data = DataReader(url)
    data, columns = web_data.read()
    # Plotting
    all_correlation = correlation_chart(data, columns)
    graphs(data, all_correlation)
    x = data.loc[:, 'Age (years)':'Wrist circumference (cm)']
    y = data["Percent body fat from Siri's (1956) equation"].to_numpy()
    # Linear Regression
    x_train, x_test, y_train, y_test = \
        train_test_split(x, y, test_size=0.4, random_state=1)
    model = DecisionTreeRegressor()
    model.fit(x_train, y_train)
    linear_reg_model = linear_regression_fit(x_train, y_train)
    print('MSE for linear train:',
          mean_squared_error(y_train, linear_reg_model.predict(x_train)))
    print('MSE for linear test:',
          mean_squared_error(y_test, linear_reg_model.predict(x_test)))
    print('MSE for decisiontree train:',
          mean_squared_error(y_train, model.predict(x_train)))
    print('MSE for decisiontree test:',
          mean_squared_error(y_test, model.predict(x_test)))
    # High correlation part
    x_high_correlation = data[high_correlation(all_correlation)].copy()
    x_high_train, x_high_test, y_high_train, y_high_test = \
        train_test_split(x_high_correlation, y, test_size=0.4, random_state=1)
    high_model = DecisionTreeRegressor()
    high_model.fit(x_high_train, y_high_train)
    high_correlation_model = linear_regression_fit(x_high_train, y_high_train)
    print(
        'MSE for high correlation train:',
        mean_squared_error(y_high_train,
                           high_correlation_model.predict(x_high_train)))
    print(
        'MSE for high correlation test:',
        mean_squared_error(y_high_test,
                           high_correlation_model.predict(x_high_test)))
    print('MSE for high correlation decisiontree train:',
          mean_squared_error(y_high_train, high_model.predict(x_high_train)))
    print('MSE for high correlation decisiontree test:',
          mean_squared_error(y_high_test, high_model.predict(x_high_test)))