Exemple #1
0
def main():
    #load normalized data
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data()
    varianceratioplot(xtrain2,ytrain2,"LDA Cum Sum Variance dat2","figs/lda/varianceratiodat2.png")
    varianceratioplot(xtrain1,ytrain1,"LDA Cum Sum Variance dat1","figs/lda/varianceratiodat1.png")
    lda = LinearDiscriminantAnalysis()
    data = lda.fit_transform(xtrain2,ytrain2)
    vary_k(xtrain2,data, 20, ytrain2, "dat2")
    plt.clf()
    data = lda.fit_transform(xtrain1,ytrain1)

    vary_k(xtrain1,data, 50, ytrain1, "dat1", iters=2)
Exemple #2
0
def main():
    #load normalized data
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data()
    # eigenratioplot(xtrain2, "Cum Sum Explained Variance Ratio per Component dat2", "figs/pca/explainedvar_dat2.png")
    # eigenratioplot(xtrain1, "Cum Sum Explained Variance Ratio per Component dat1", "figs/pca/explainedvar_dat1.png")
    # rec_err_plot(xtrain2,18, "Reconstruction Error dat 2", "figs/pca/recon_err_dat2.png")
    # rec_err_plot(xtrain1,54, "Reconstruction Error dat 1", "figs/pca/recon_err_dat1.png")
    pca = PCA(n_components=7)
    data=pca.fit_transform(xtrain2)
    vary_k(xtrain2,data, 20, ytrain2, "dat2")
    pca = PCA(n_components=44)
    data=pca.fit_transform(xtrain1)
    vary_k(xtrain1,data, 50, ytrain1, "dat1")
Exemple #3
0
def main():
    #load normalized data
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
    )
    kurt(xtrain1, 55, "ICA Mean Kurtosis vs num_components dat1",
         "figs/ica/kurtdat1.png")
    kurt(xtrain2, 20, "ICA Mean Kurtosis vs num_components dat2",
         "figs/ica/kurtdat2.png")
    rec_err_plot(xtrain2, 18, "Reconstruction Error dat 2",
                 "figs/ica/recon_err_dat2.png")
    rec_err_plot(xtrain1, 55, "Reconstruction Error dat 1",
                 "figs/ica/recon_err_dat1.png")
    ica = FastICA(n_components=3)
    data = ica.fit_transform(xtrain2)
    vary_k(xtrain2, data, 20, ytrain2, "dat2test")
    ica = FastICA(n_components=36)
    data = ica.fit_transform(xtrain1)
    vary_k(xtrain1, data, 50, ytrain1, "dat1")
Exemple #4
0
def main():
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
    )
    vary_k(xtrain2, 20, ytrain2, "dat2")
    vary_k(xtrain1, 50, ytrain1, "dat1")
    bic_model_selection(xtrain2, 20, "BIC per model dat2",
                        "figs/em/bic_dat2.png", "BIC Score")
    bic_model_selection(xtrain2, 20, "AIC per model dat2",
                        "figs/em/aic_dat2.png", "AIC Score")
    bic_model_selection(xtrain2, 20, "Average Log likelihood per model dat2",
                        "figs/em/score_dat2.png",
                        "Average Log Likelihood Score")
    bic_model_selection(xtrain1, 100, "BIC per model dat1",
                        "figs/em/bic_dat1.png", "BIC Score")
    bic_model_selection(xtrain1, 100, "AIC per model dat1",
                        "figs/em/aic_dat1.png", "AIC Score")
    bic_model_selection(xtrain1, 100, "Average Log likelihood per model dat1",
                        "figs/em/score_dat1.png",
                        "Average Log Likelihood Score")
Exemple #5
0
def main():
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
    )
    km = KMeans(4)
    visualizer = SilhouetteVisualizer(km, colors='yellowbrick')
    visualizer.fit(xtrain1)
    visualizer.show()
    ytest = km.fit_predict(xtrain1)
    print(metrics.homogeneity_score(ytrain1, ytest))
    score(xtrain2, 20, ytrain2)
    elbowplot(xtrain2, 20, "distortion",
              "K Means Clustering Distortion vs Number of Clusters dat2",
              "figs/kmeans/kmeans_elbow_dat2.png")
    elbowplot(xtrain1, 100, "distortion",
              "K Means Clustering Distortion vs Number of Clusters dat1",
              "figs/kmeans/kmeans_elbow_dat1.png")
    elbowplot(xtrain2,
              40,
              "silhouette",
              "K Means Clustering Silhouette Score vs Number of Clusters dat2",
              "figs/kmeans/kmeans_silhouette_dat2.png",
              elbow=False)
    elbowplot(xtrain1,
              100,
              "silhouette",
              "K Means Clustering Silhouette Score vs Number of Clusters dat1",
              "figs/kmeans/kmeans_silhouette_dat1.png",
              elbow=False)
    elbowplot(
        xtrain2,
        20,
        "calinski_harabasz",
        "K Means Clustering Calinski Harabasz Score vs Number of Clusters dat2",
        "figs/kmeans/kmeans_calinski_dat2.png",
        elbow=False)
    elbowplot(
        xtrain1,
        100,
        "calinski_harabasz",
        "K Means Clustering Calinski Harabasz Score vs Number of Clusters dat1",
        "figs/kmeans/kmeans_calinski_dat1.png",
        elbow=False)
Exemple #6
0
def cluster_nn():
    out = "csv output/"
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
        test_size=0.05)
    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    km = KMeans()
    pipe = Pipeline(steps=[('km', km), ('neuralnet', nn2)])
    grid = {'km__n_clusters': np.arange(1, 20, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'kmnndat2.csv')

    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    em = myGMM()
    pipe = Pipeline(steps=[('em', em), ('neuralnet', nn2)])
    grid = {'em__n_components': np.arange(1, 20, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'emnndat2.csv')

    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    em = myGMMstack()
    pipe = Pipeline(steps=[('em', em), ('neuralnet', nn2)])
    grid = {'em__n_components': np.arange(1, 20, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'emstacknndat2.csv')

    kmeanstack()
Exemple #7
0
def main():
    #load normalized data
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
    )
    #dat1: 36, dat2: 3
    rec_err_plot(xtrain2, 18, "Random Projection Reconstruction Error dat2",
                 "figs/rca/recondat2.png")
    rec_err_plot(xtrain1, 54, "Random Projection Reconstruction Error dat1",
                 "figs/rca/recondat1.png")
    sil_plots(xtrain2, 18, 4, "dat2")
    sil_plots(xtrain1, 54, 44, "dat1")
    """0.03288655685198534
    0.017619964663429535
    0.007523731695415414
    0.005271076420350498"""
    transformer = GaussianRandomProjection(n_components=14)
    data = transformer.fit_transform(xtrain2)
    vary_k(xtrain2, data, 20, ytrain2, "dat2")
    transformer = GaussianRandomProjection(n_components=44)
    data = transformer.fit_transform(xtrain1)
    vary_k(xtrain1, data, 50, ytrain1, "dat1")
Exemple #8
0
def kmeanstack():
    frame = np.ones((5, 20))
    times = np.ones((5, 20))
    for j in range(5):
        xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
            test_size=0.2)
        for i in range(1, 20, 1):
            km = KMeans(n_clusters=i)
            xnew = np.hstack((xtrain2, km.fit_transform(xtrain2)))
            xtestnew = np.hstack((xtest2, km.transform(xtest2)))
            nn2 = MLPClassifier(activation='relu',
                                alpha=0.001,
                                hidden_layer_sizes=(140, ),
                                learning_rate_init=0.0033333366666666664)
            start = time.time()
            nn2.fit(xnew, ytrain2)
            fittime = time.time() - start
            times[j][i - 1] = fittime
            frame[j][i - 1] = nn2.score(xtestnew, ytest2)
    np.savetxt("nnkmstack.csv", frame, delimiter=",")
    np.savetxt("nnkmstacktimes.csv", times, delimiter=",")
Exemple #9
0
import datetime
from app import save_data, load_data

save_data("新宿", "渋谷", "テスト", datetime.datetime(2020, 10, 31, 0))

print(load_data())
Exemple #10
0
 def setUp(self):
     importlib.reload(app)
     documents, directories = app.load_data()
     self.docs = documents
     self.dirs = directories
     self.commands = app.commands
import maps
import unidecode as und
import csv
import multicampi
import pandas as pd
import app


def obter_nome_correto(nome_errado, localidades):
    for localidade in localidades.keys():
        if localidade in und.unidecode(str.lower(nome_errado)):
            return localidade
    print(nome_errado)
    return nome_errado


localidades = maps.load_cities_coordinates('data/localidades.csv')
df = app.load_data('data/dados_pesquisa.csv', 0)
df['cidade'] = df['cidade'].apply(lambda c: obter_nome_correto(c, localidades))

df.to_csv('cidades_normalizadas3.csv', sep=';')
Exemple #12
0
def reduction_nn():
    out = "csv output/"
    xtrain1, xtest1, ytrain1, ytest1, xtrain2, xtest2, ytrain2, ytest2 = load_data(
        test_size=0.05)
    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    gs = GridSearchCV(nn2, {}, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'dat2.csv')

    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    pca = PCA()
    pipe = Pipeline(steps=[('pca', pca), ('neuralnet', nn2)])
    grid = {'pca__n_components': np.arange(1, 19, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=2, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'pcadat2.csv')

    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    ica = FastICA()
    pipe = Pipeline(steps=[('ica', ica), ('neuralnet', nn2)])
    grid = {'ica__n_components': np.arange(1, 19, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'icadat2.csv')

    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)
    rca = GaussianRandomProjection()
    pipe = Pipeline(steps=[('rca', rca), ('neuralnet', nn2)])
    grid = {'rca__n_components': np.arange(1, 19, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'rcadat2.csv')

    nn2 = MLPClassifier(activation='relu',
                        alpha=0.001,
                        hidden_layer_sizes=(140, ),
                        learning_rate_init=0.0033333366666666664)

    lda = LinearDiscriminantAnalysis()
    pipe = Pipeline(steps=[('lda', lda), ('neuralnet', nn2)])
    grid = {'lda__n_components': np.arange(1, 4, 1)}
    gs = GridSearchCV(pipe, grid, return_train_score=True, verbose=10, cv=5)
    gs.fit(xtrain2, ytrain2)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'ldadat2.csv')