예제 #1
0
def violin_plot(data, name):
    """
    Plot violin plots for the building types versus the building height.
    """
    sns.set_style("ticks")

    fig = plt.figure(figsize=(8, 6))

    violin = sns.violinplot(x=data['bldg_type'],
                            y=data['rel_height'],
                            scale='width',
                            width=0.75,
                            color='steelblue')
    violin.set_xticklabels(violin.get_xticklabels(),
                           rotation=45,
                           horizontalalignment='right')

    fig.tight_layout()
    sns.despine()

    violin.set_xlabel('Building Type')
    violin.set_ylabel('Building Height [m]')

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Violin_BldTypes_" + name + ".pdf",
                    bbox_inches="tight",
                    dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
예제 #2
0
def correlation_matrix(data, name):
    """
    Compute the correlation matrix for the non-geometric features
    and the building height.
    """
    sns.set_style("ticks")

    corr_matrix = data[[
        'rel_height', 'avg_hh_income', 'avg_hh_size', 'pop_density', 'h_mean',
        'num_amenities'
    ]].corr()

    features = [
        'Building Height', 'Avg. HH. Income', 'Avg. HH. Size',
        'Population Density', 'Raster Height', '#Amenities'
    ]

    fig = plt.figure(figsize=(5, 5))

    # Create mask to only show one halve of the matrix
    mask = np.triu(np.ones_like(corr_matrix, dtype=np.bool))

    heatmap = sns.heatmap(corr_matrix,
                          xticklabels=features,
                          yticklabels=features,
                          cmap='RdBu',
                          annot=True,
                          linewidth=0.5,
                          square=True,
                          mask=mask,
                          linewidths=.5,
                          cbar_kws={
                              "shrink": 0.6,
                              "label": "Correlation"
                          },
                          vmin=-1,
                          vmax=1)
    heatmap.set_xticklabels(heatmap.get_xticklabels(),
                            rotation=45,
                            horizontalalignment='right')
    heatmap.tick_params(left=False, bottom=False)
    fig.tight_layout()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Correlation_NewFeatures_" + name + ".pdf",
                    bbox_inches="tight",
                    dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")

    plt.clf()
예제 #3
0
def rf_min_samples_leaf(train_features, train_labels, test_features, test_labels, name):
    """
    Plot the minimum samples required in a leaf against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    samples_start = np.linspace(2, 24, 12, dtype=int)
    samples_end = np.linspace(25, 750, num=30, dtype=int)
    min_samples_leaf = np.hstack((samples_start, samples_end))

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'RF', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for samples in min_samples_leaf:
        print("Samples leaf:", samples)

        randomforest = RandomForestRegressor(min_samples_leaf=samples, n_jobs=-1, random_state=0)
        randomforest.fit(train_scaled, train_labels)
        predict_train = randomforest.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = randomforest.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=min_samples_leaf, y=train_results, label='Train')
    sns.lineplot(x=min_samples_leaf, y=test_results, label='Test')
    plt.legend(frameon=False, loc='upper right')
    plt.xlabel('Minimum samples in leaf')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Min_Samples_Leaf_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
예제 #4
0
def rf_max_depth(train_features, train_labels, test_features, test_labels, name):
    """
    Plot the maximum tree depth against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    # Maximum depth of the tree.
    max_depth = np.linspace(1, 35, 35, dtype=int)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'RF', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for depth in max_depth:
        print("Depth:", depth)

        randomforest = RandomForestRegressor(max_depth=depth, n_jobs=-1, random_state=0)
        randomforest.fit(train_scaled, train_labels)
        predict_train = randomforest.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = randomforest.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=max_depth, y=train_results, label='Train')
    sns.lineplot(x=max_depth, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('Maximum tree depth')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Max_Depth_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
예제 #5
0
def rf_n_estimators(train_features, train_labels, test_features, test_labels, name):
    """
    Plot the number of estimators against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    # The number of trees in the random forest.
    n_estimators = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'RF', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for estimator in n_estimators:
        print("Num estimators:", estimator)

        randomforest = RandomForestRegressor(n_estimators=estimator, n_jobs=-1, random_state=0)
        randomforest.fit(train_scaled, train_labels)
        predict_train = randomforest.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = randomforest.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=n_estimators, y=train_results, label='Train')
    sns.lineplot(x=n_estimators, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('Number of estimators')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/N_Estimators_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
예제 #6
0
def svr_C(train_features, train_labels, test_features, test_labels, name):
    """
    Plot C against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    c_values = np.linspace(1e-4, 1, 10)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'SVR', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for c_val in c_values:
        print("C:", c_val)

        svr = LinearSVR(C=c_val, max_iter=2000, random_state=0)
        svr.fit(train_scaled, train_labels)
        predict_train = svr.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = svr.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=c_values, y=train_results, label='Train')
    sns.lineplot(x=c_values, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('C')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/C_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
def emperical_cdf(ground_truth, rfr, svr, mlr, city, env):
    """
    Plot a cumulative error graph showing how the errors are distributed
    over the number of buildings.
    """

    sns.set()
    sns.set_style("white")
    sns.set_style("ticks")

    abs_errors_rf = np.sort(abs(ground_truth - rfr))
    prop_vals_rf = np.linspace(0, 1, len(abs_errors_rf))

    abs_errors_svr = np.sort(abs(ground_truth - svr))
    prop_vals_svr = np.linspace(0, 1, len(abs_errors_svr))

    abs_errors_mlr = np.sort(abs(ground_truth - mlr))
    prop_vals_mlr = np.linspace(0, 1, len(abs_errors_mlr))

    fig, ax = plt.subplots()
    ax.plot(abs_errors_rf, prop_vals_rf, label='RFR')
    ax.plot(abs_errors_svr, prop_vals_svr, label='SVR')
    ax.plot(abs_errors_mlr, prop_vals_mlr, label='MLR')
    ax.set_xlabel("Error [m]")
    ax.set_ylabel("Cumulative Frequency")

    if city == 'Seattle':
        ax.set_xlim([0, 100])
    else:
        ax.set_xlim([0, 8])
    ax.set_ylim([0, 1])
    ax.legend(frameon=False, loc='lower right')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Cumulative_Errors_" + city + "_" + env + ".pdf",
                    bbox_inches="tight",
                    dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
예제 #8
0
def svr_maxiter_tolerance(train_features, train_labels, test_features, test_labels, name):
    """
    Plot a combination of the maximum number of iterations and the tolerance
    against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    tolerances = [1e-3, 1e-4, 1e-5]
    tol_labels = ['1e-3', '1e-4', '1e-5']
    max_iter = np.linspace(100, 5000, 50, dtype=int)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'SVR', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for tolerance in tolerances:
        temp_train = []
        temp_test = []

        print("Tolerance:", tolerance)

        for iteration in max_iter:
            print("Max. iterations:", iteration)

            svr = LinearSVR(tol=tolerance, max_iter=iteration, random_state=0)
            svr.fit(train_scaled, train_labels)
            predict_train = svr.predict(train_scaled)

            # Accuracy of training data (mean absolute percentage error)
            accuracy_train = compute_accuracy(predict_train, train_labels)
            temp_train.append(accuracy_train)

            predict_test = svr.predict(test_scaled)

            # Accuracy for test data.
            accuracy_test = compute_accuracy(predict_test, test_labels)
            temp_test.append(accuracy_test)

        train_results.append(temp_train)
        test_results.append(temp_test)

    fig = plt.figure(figsize=(10, 6))
    for i in range(len(train_results)):
        label_train = 'Train (tol' + tol_labels[i] +')'
        sns.lineplot(x=max_iter, y=train_results[i], label=label_train)
        label_test = 'Test (tol' + tol_labels[i] +')'
        sns.lineplot(x=max_iter, y=test_results[i], label=label_test)

    plt.legend(frameon=False, loc='lower left', bbox_to_anchor=(1.0, 0.0))
    plt.xlabel('Maximum number of iterations')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/MaxIter_Tolerance_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")