def multiple_figures():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])

    #width = 3.39
    #height = 3.39 * (np.sqrt(5) - 1.0) / 2.0
    latexify()


    for title, model in zip(['without', 'gaussian', 'constant', ], [without, guassian, constant, ]):
        y, y_pred = model(pipe)

        print(title, classifier[0])
        print(class_counter(y))
        print(metrics.classification_report(y, y_pred, labels=labels))


        acc = metrics.accuracy_score(y, y_pred)
        cm = metrics.confusion_matrix(y, y_pred, labels=labels)
        cm = norm_cm(cm)

        cm = pd.DataFrame(cm, index=labels, columns=labels)

        fig, ax = plt.subplots(dpi=92)

        sns.heatmap(cm, vmin=0, vmax=1, annot=True, fmt='.2f', cmap='Greys', ax=ax, cbar=False, square=True)
        format_axes_for_cm(ax)
        ax.set_title(f'accuracy = {acc:.3f}')

        fig.tight_layout()

        ensure_dir('./output/interpolations/')
        fig.savefig(f'./output/interpolations/{title}.pdf', dpi=92, bbox_inches='tight')
        plt.clf()
Esempio n. 2
0
def main():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])
    latexify()

    w_sizes = (5, 10, 50, 100)

    for (w_prr, w_history) in it.product(w_sizes, repeat=2):
        y, y_pred = different_window_sizes(w_prr, w_history)

        acc = metrics.accuracy_score(y, y_pred)
        prec = metrics.precision_score(y,
                                       y_pred,
                                       average='weighted',
                                       labels=labels)
        recall = metrics.recall_score(y,
                                      y_pred,
                                      average='weighted',
                                      labels=labels)
        f1 = metrics.f1_score(y, y_pred, average='weighted', labels=labels)

        print(
            f'& {w_history}\t& {w_prr}\t& {acc:.3f}\t& {prec:.3f}\t& {recall:.3f}\t& {f1:.3f}'
        )

        #print(f'Wh=={w_history}; Wprr=={w_prr}')
        #print(metrics.classification_report(y, y_pred, labels=labels))

        cm = metrics.confusion_matrix(y, y_pred, labels=labels)
        cm = norm_cm(cm)
        cm = pd.DataFrame(cm, index=labels, columns=labels)

        fig, ax = plt.subplots(dpi=92)
        sns.heatmap(cm,
                    vmin=0,
                    vmax=1,
                    annot=True,
                    fmt='.2f',
                    cmap='Greys',
                    ax=ax,
                    cbar=False,
                    square=True)
        #ax.set_title(f'$\\mathrm{{Acc}}(W_{{\\mathrm{{PRR}}}}={w_prr}, W_{{\\mathrm{{history}}}}={w_history})={acc:.3f}$')
        ax.set_title(
            f'Accuracy = {acc:.3f}\n(prec = {prec:.3f}, rec = {recall:.3f})')
        format_axes_for_cm(ax)

        fig.tight_layout()

        ensure_dir('./output/w_sizes/')
        fig.savefig(f'./output/w_sizes/Wprr{w_prr}_Wh{w_history}.pdf',
                    dpi=92,
                    bbox_inches='tight')
        plt.close(fig)
Esempio n. 3
0
def multiplots():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])

    latexify()

    for model, title in zip([no_resample, undersample, oversample], ['none', 'undersample', 'oversample']):
        y, y_pred, c = model(classifier)

        acc = metrics.accuracy_score(y, y_pred)
        #prec = metrics.precision_score(y, y_pred, labels=labels, average='macro')
        #rec = metrics.recall_score(y, y_pred, labels=labels, average='macro')
        #f1 = metrics.f1_score(y, y_pred, labels=labels, average='macro')

        cm = metrics.confusion_matrix(y, y_pred, labels=labels)
        cm = norm_cm(cm)

        cm = pd.DataFrame(cm, index=labels, columns=labels)

        fig, ax = plt.subplots(dpi=92, constrained_layout=True)
        #print(f'{title}\t-- Acc.: {acc:.3f};\t Prec.: {prec:.3f}\t Rec.: {rec:.3f}\t F1: {f1:.3f}')
        print('Resample:', title, classifier[0], f'accuracy={acc:.3f}')
        print(metrics.classification_report(y, y_pred, labels=labels))

        plt.suptitle(f'accuracy={acc:.3f}')
        #plt.suptitle(f'good={c["good"]:,}\ninterm.={c["interm."]:,}\nbad={c["bad"]:,}', ha='left')

        sns.heatmap(cm, vmin=0, vmax=1, annot=True, fmt='.2f', cmap='Greys', ax=ax, cbar=False, square=True)

        #ax.set_title(f'Accuracy = {acc:.3f}', loc='center')
        ax.set_title(
            f'good:    {c["good"]:,}\ninterm.: {c["interm."]:,}\nbad:      {c["bad"]:,}',
            fontdict={'fontsize': 9},
            loc='left'
        )

        format_axes_for_cm(ax)

        #fig.tight_layout()
        ensure_dir('./output/resampling/')
        fig.savefig(f'./output/resampling/{title}.pdf', dpi=92, bbox_inches='tight')
        plt.close(fig)
Esempio n. 4
0
def advanced_charts():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])
    latexify(columns=2)

    w_sizes = (2, 5, 10, 15, 20, 30, 50, 80,
               100)[::-1]  # [::-1] will reverse order

    fig, ax = plt.subplots(dpi=92)

    colors = sns.color_palette("cubehelix", len(w_sizes))

    for w_prr, color in zip(w_sizes, colors):
        acc = []
        for w_history in w_sizes:
            y, y_pred = different_window_sizes(w_prr, w_history)
            acc.append(metrics.accuracy_score(y, y_pred))

        ax.plot(w_sizes,
                acc,
                label=f'W$_\\mathrm{{PRR}}={w_prr}$',
                color=color)

    ax.set_ylabel('accuracy')
    ax.set_xlabel(f'W$_\\mathrm{{history}}$')
    ax.set_xticks(w_sizes[::-1])
    ax.set_xlim(min(w_sizes), max(w_sizes))
    format_axes_for_chart(ax)

    fig.tight_layout()
    fig.legend(loc='right')

    ensure_dir('./output/')
    fig.savefig('./output/different_window_sizes_linechart.pdf',
                dpi=92,
                bbox_inches='tight')

    plt.close(fig)
Esempio n. 5
0
def multiple_figures():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])

    latexify()

    pipe = pipe_dtree

    for features in feature_sets:
        y, y_pred = different_features(pipe, features)

        acc = metrics.accuracy_score(y, y_pred)
        prec = metrics.precision_score(y,
                                       y_pred,
                                       average='weighted',
                                       labels=labels)
        recall = metrics.recall_score(y,
                                      y_pred,
                                      average='weighted',
                                      labels=labels)

        #prec = metrics.precision_score(y, y_pred, labels=labels, average='micro')
        #rec = metrics.recall_score(y, y_pred, labels=labels, average='micro')

        cm = metrics.confusion_matrix(y, y_pred, labels=labels)
        cm = norm_cm(cm)

        cm = pd.DataFrame(cm, index=labels, columns=labels)

        fig, ax = plt.subplots(dpi=92)
        sns.heatmap(cm,
                    vmin=0,
                    vmax=1,
                    annot=True,
                    fmt='.2f',
                    cmap='Greys',
                    ax=ax,
                    cbar=False,
                    square=True)
        format_axes_for_cm(ax)

        feature_str = stringify_features(features)
        ax.set_title(
            f'Accuracy = {acc:.3f}\n(prec = {prec:.3f}; rec = {recall:.3f})')

        fig.tight_layout()

        ensure_dir('./output/features/dtree/')
        fig.savefig(f'./output/features/dtree/{feature_str}.pdf',
                    dpi=92,
                    bbox_inches='tight')
        plt.close(fig)
        print(f'Done {features}')

    pipe = pipe_logreg

    for features in feature_sets:
        print('Features', features)

        y, y_pred = different_features(pipe, features)

        acc = metrics.accuracy_score(y, y_pred)
        prec = metrics.precision_score(y,
                                       y_pred,
                                       average='micro',
                                       labels=labels)
        recall = metrics.recall_score(y,
                                      y_pred,
                                      average='micro',
                                      labels=labels)

        #prec = metrics.precision_score(y, y_pred, labels=labels, average='micro')
        #rec = metrics.recall_score(y, y_pred, labels=labels, average='micro')

        cm = metrics.confusion_matrix(y, y_pred, labels=labels)
        cm = norm_cm(cm)

        cm = pd.DataFrame(cm, index=labels, columns=labels)

        fig, ax = plt.subplots(dpi=92)
        sns.heatmap(cm,
                    vmin=0,
                    vmax=1,
                    annot=True,
                    fmt='.2f',
                    cmap='Greys',
                    ax=ax,
                    cbar=False,
                    square=True)
        format_axes_for_cm(ax)

        feature_str = stringify_features(features)
        ax.set_title(
            f'Accuracy = {acc:.3f}\n(prec = {prec:.3f}, rec = {recall:.3f})')

        fig.tight_layout()

        ensure_dir('./output/features/logistic/')
        fig.savefig(f'./output/features/logistic/{feature_str}.pdf',
                    dpi=92,
                    bbox_inches='tight')
        plt.close(fig)
        print(f'Done {features}')
def multiple_figures():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])
    latexify()

    cv = model_selection.StratifiedKFold(n_splits=10, shuffle=True)
    scaler = preprocessing.StandardScaler()
    resample = over_sampling.RandomOverSampler()

    baseline = pipeline.make_pipeline(
        scaler, resample,
        dummy.DummyClassifier(strategy='constant', constant='good'))

    logreg = pipeline.make_pipeline(
        scaler,
        resample,
        linear_model.LogisticRegression(solver='lbfgs', multi_class='ovr'),
    )

    dtree = pipeline.make_pipeline(
        scaler,
        resample,
        tree.DecisionTreeClassifier(),
    )

    knn = pipeline.make_pipeline(
        scaler,
        resample,
        neighbors.KNeighborsClassifier(),
    )

    mlp = pipeline.make_pipeline(
        scaler,
        resample,
        neural_network.MLPClassifier(hidden_layer_sizes=(
            100,
            100,
            100,
        ),
                                     activation='relu',
                                     solver='adam'),
    )

    svc = pipeline.make_pipeline(
        scaler,
        resample,
        svm.LinearSVC(),
    )

    RForest = pipeline.make_pipeline(
        scaler,
        resample,
        ensemble.RandomForestClassifier(n_estimators=100),
    )

    models = (
        ('Constant', baseline),
        ('Logistic Regression', logreg),
        ('Decision Tree', dtree),
        #('kNN', knn),
        ('Multi-Layer Perceptron', mlp),
        ('linearSVM', svc),
        ('Random Forest', RForest),
    )

    # Special case of baseline
    filename = 'baseline-link-overall'
    df = prepare_data()
    y, y_pred = df['class'].ravel(), df['class_overall'].ravel()

    acc = metrics.accuracy_score(y, y_pred)
    prec = metrics.precision_score(y,
                                   y_pred,
                                   average='weighted',
                                   labels=labels)
    recall = metrics.recall_score(y, y_pred, average='weighted', labels=labels)

    cm = metrics.confusion_matrix(y, y_pred, labels=labels)
    cm = norm_cm(cm)

    cm = pd.DataFrame(cm, index=labels, columns=labels)

    fig, ax = plt.subplots(dpi=92)
    sns.heatmap(cm,
                vmin=0,
                vmax=1,
                annot=True,
                fmt='.2f',
                cmap='Greys',
                ax=ax,
                cbar=False,
                square=True)
    ax.set_title(
        f'accuracy = {acc:.3f}\n(prec = {prec:.3f}, rec = {recall:.3f})')
    format_axes_for_cm(ax)

    fig.tight_layout()

    ensure_dir('./output/models/')
    fig.savefig(f'./output/models/{filename}.pdf', dpi=92, bbox_inches='tight')
    plt.close(fig)
    print(f'Done {filename}')

    for name, pipe in models:
        filename = name.lower().replace(' ', '_')

        y, y_pred = different_models(pipe)

        acc = metrics.accuracy_score(y, y_pred)
        #prec = metrics.precision_score(y, y_pred, average='weighted', labels=labels)
        #recall = metrics.recall_score(y, y_pred, average='weighted', labels=labels)
        print(name)
        print(metrics.classification_report(y, y_pred, labels=labels))

        cm = metrics.confusion_matrix(y, y_pred, labels=labels)
        cm = norm_cm(cm)

        cm = pd.DataFrame(cm, index=labels, columns=labels)

        fig, ax = plt.subplots(dpi=92)
        sns.heatmap(cm,
                    vmin=0,
                    vmax=1,
                    annot=True,
                    fmt='.2f',
                    cmap='Greys',
                    ax=ax,
                    cbar=False,
                    square=True)
        ax.set_title(f'accuracy={acc:.3f}')
        format_axes_for_cm(ax)

        fig.tight_layout()

        ensure_dir('./output/models/')
        fig.savefig(f'./output/models/{filename}.pdf',
                    dpi=92,
                    bbox_inches='tight')
        plt.close(fig)
Esempio n. 7
0
def main():
    mpl.style.use(['seaborn-white', 'seaborn-paper', 'grayscale'])
    latexify(columns=2)

    #cv = model_selection.StratifiedKFold(n_splits=10, shuffle=True)
    #poly = preprocessing.PolynomialFeatures(degree=2)
    scaler = preprocessing.StandardScaler()
    resample = over_sampling.RandomOverSampler()

    baseline = pipeline.make_pipeline(
        scaler, resample, dummy.DummyClassifier(strategy='constant',
                                                constant=0))

    logreg = pipeline.make_pipeline(
        scaler,
        resample,
        linear_model.LogisticRegression(),
    )

    sgd = pipeline.make_pipeline(
        scaler,
        resample,
        linear_model.SGDClassifier(),
    )

    dtree = pipeline.make_pipeline(
        scaler,
        resample,
        tree.DecisionTreeClassifier(),
    )

    mlp = pipeline.make_pipeline(scaler, resample,
                                 neural_network.MLPClassifier())

    svc = pipeline.make_pipeline(scaler, resample, svm.LinearSVC())

    RForest = pipeline.make_pipeline(scaler, resample,
                                     ensemble.RandomForestClassifier())

    models = (
        ('Constant', baseline),
        ('Logistic Reg.', logreg),
        ('Decision Tree', dtree),
        #('kNN', knn),
        ('Multi-Layer Perceptron', mlp),
        ('SVM (linear kernel)', svc),
        ('Random Forest', RForest),
    )

    colors = sns.color_palette("cubehelix", len(models))

    fig, ax = plt.subplots(dpi=92)  # Setup a figure

    #ax.set_title('Precision-Recall curve')

    #ax.set_xlim(0, 1)
    #ax.set_ylim(0, 1)

    ax.set_xlabel('Recall = $\\frac{{TP}}{{TP+FN}}$')
    ax.set_ylabel('Precision = $\\frac{{TP}}{{TP+FP}}$')

    # Prepare data for processing
    data = prepare_data()
    X, y = data[['rssi', 'rssi_avg', 'rssi_std']].values, data['class'].ravel()
    Y = preprocessing.label_binarize(y, classes=classes)
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, Y, test_size=0.2, random_state=random_state)

    for (name, model), color in zip(models, colors):
        classifier = multiclass.OneVsRestClassifier(
            model)  # Make model support *.decision_function

        classifier.fit(X_train, y_train)

        # generate y_score
        if hasattr(classifier, 'decision_function'):
            y_score = classifier.decision_function(X_test)
        else:
            y_score = classifier.predict_proba(X_test)
            #continue

        # generate probabilities
        #y_proba = classifier.predict_proba(X_test)

        # generate predictions
        y_pred = classifier.predict(X_test)

        precision = dict()
        recall = dict()
        average_precision = dict()

        acc = metrics.accuracy_score(y_test, y_pred)

        for i in [1]:  # We observe only intermediate class
            precision[i], recall[i], _ = metrics.precision_recall_curve(
                y_test[:, i], y_score[:, i])
            average_precision[i] = metrics.average_precision_score(
                y_test[:, i], y_score[:, i])

            ax.step(recall[i],
                    precision[i],
                    where='post',
                    color=color,
                    alpha=0.65,
                    label=f'{name}')

        print(f'Plotted {name}')

    ax.legend(loc="best")
    format_axes_for_chart(ax)
    fig.tight_layout()

    ensure_dir('./output/')
    fig.savefig('./output/precision-recall-curve.pdf',
                dpi=92,
                bbox_inches='tight')
    #plt.show()
    plt.close(fig)