Esempio n. 1
0
def _main():
    X, y = data_preprocessing.import_dataset('Salary_Data.csv', slice(0, -1),
                                             1)

    X_train, X_test, y_train, y_test = data_preprocessing.split_train_test(
        X, y, 1 / 3)

    regressor = LinearRegression()
    regressor.fit(X_train, y_train)

    visualize_performance_on_training(regressor, X_train, y_train)
    visualize_performance_on_test(regressor, X_test, y_test)
def _main():
    features, labels = data_preprocessing.import_dataset('50_Startups.csv', slice(0, 4), 4)

    features, _ = data_preprocessing.one_hot_encode_categorical_features(features, [3])

    features_train, features_test, labels_train, labels_test = \
        data_preprocessing.split_train_test(features, labels, test_size=0.2)

    regressor = LinearRegression()
    regressor.fit(features_train, labels_train)

    labels_test_pred = regressor.predict(features_test)

    features_opt_idxs = backward_elimination(features, labels)
def main():
    features, labels = data_preprocessing.import_dataset('Position_Salaries.csv', [1], [2])
    labels = labels.flatten()

    regressor = RandomForestRegressor(n_estimators=100, random_state=0)
    regressor.fit(features, labels)

    plt.scatter(features, labels, color='red', label='Training examples')

    feature_grid = np.arange(min(features), max(features), step=0.01)
    feature_grid = feature_grid.reshape((len(feature_grid), 1))
    plt.plot(feature_grid, regressor.predict(feature_grid), color='blue', label='Predictions')

    plt.legend()
    plt.xlabel('Position level')
    plt.ylabel('Salary')
    plt.show()
Esempio n. 4
0
def main():
    features, labels = data_preprocessing.import_dataset(
        'Position_Salaries.csv', [1], [2])

    polynomial_features = PolynomialFeatures(degree=4).fit_transform(features)

    regressor = LinearRegression()
    regressor.fit(polynomial_features, labels)

    plt.scatter(features, labels, color='red', label='Training examples')
    plt.plot(features,
             regressor.predict(polynomial_features),
             color='blue',
             label='Predictions')
    plt.legend()
    plt.xlabel('Position level')
    plt.ylabel('Salary')
    plt.show()
Esempio n. 5
0
def main():
    features, labels = data_preprocessing.import_dataset('Position_Salaries.csv', [1], [2])

    feature_scaler = StandardScaler()
    features = feature_scaler.fit_transform(features)

    label_scaler = StandardScaler()
    labels = label_scaler.fit_transform(labels)
    labels = labels.flatten()

    regressor = SVR(kernel='rbf', gamma='scale')
    regressor.fit(features, labels)

    plt.scatter(features, labels, color='red', label='Training examples')
    plt.plot(features, regressor.predict(features), color='blue', label='Predictions')
    plt.legend()
    plt.xlabel('Position level')
    plt.ylabel('Salary')
    plt.show()
Esempio n. 6
0
def main():
    features, labels = data_preprocessing.import_dataset('Position_Salaries.csv', [1], [2])

    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(features, labels)

    plt.scatter(features, labels, color='red', label='Training examples')

    # plot with high resolution because with only one feature, the simple plot will
    # show that all training examples are matched by the prediction, which is because
    # of the way decision tree regression predicts a value from an average of a region.
    feature_grid = np.arange(min(features), max(features), step=0.01)
    feature_grid = feature_grid.reshape((len(feature_grid), 1))
    plt.plot(feature_grid, regressor.predict(feature_grid), color='blue', label='Predictions')

    plt.legend()
    plt.xlabel('Position level')
    plt.ylabel('Salary')
    plt.show()
from data_preprocessing import data_preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from plots.classification_result_visualizer import visualize_two_feature_classification


features, labels = data_preprocessing.import_dataset(
        'datasets/Social_Network_Ads.csv', [2, 3], 4)

feature_scaler = StandardScaler()
features = feature_scaler.fit_transform(features)

features_train, features_test, labels_train, labels_test = \
    data_preprocessing.split_train_test(features, labels, test_size = 0.25)

classifier = LogisticRegression(random_state=0, solver='liblinear')
classifier.fit(features, labels)

visualize_two_feature_classification(features_train, labels_train, classifier, 
                                     xlabel='Age', ylabel='Estimated salary')
Esempio n. 8
0
from data_preprocessing import data_preprocessing
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt

features = data_preprocessing.import_dataset('datasets/Mall_Customers.csv', [3, 4])

# choose K using elbow method
max_feature_count = 10
wcss = []
for i in range(1, max_feature_count + 1):
    kmeans = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300)
    kmeans.fit_predict(features)
    wcss.append(kmeans.inertia_)

plt.plot(range(1, max_feature_count + 1), wcss)
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

# look at the plot and choose the value using the elbow method
K = 5
kmeans = KMeans(n_clusters=K, init='k-means++', n_init=10, max_iter=300)
cluster_pred = kmeans.fit_predict(features)

# visualize the clusters. only applicable when the number of features is 2 or 3.
colors = ['red', 'blue', 'green', 'cyan', 'magenta']
for i in range(K):
    cluster_item_indexes = cluster_pred == i
    plt.scatter(features[cluster_item_indexes, 0], 
                features[cluster_item_indexes, 1], 
                c=colors[i],