Ejemplo n.º 1
0
def mlp_titanic():
    """
    constant_params = {
        'random_state': 0,
        'solver': 'lbfgs',  # показал лучше всех
        'early_stopping': False,  # флаг возможности ранней остановки градиентного спуска
        'learning_rate': 'invscaling',
        'max_iter': 1000,  # максимальное количество итераций градиентного спуска
    }
    MLP - model1: acc = 0.8731117824773413 %, tested 331 total.
    MLP - model2: acc = 0.9063444108761329 %, tested 331 total.
    MLP - model3: acc = 0.9274924471299094 %, tested 331 total.
    """
    complete = False
    titanic = Titanic('../data/titanic/')
    train = titanic.load_train(complete)
    test = titanic.load_test(complete)

    constant_params = {
        'random_state': 0,
        'solver': 'lbfgs',
        'early_stopping': False,
        'learning_rate': 'invscaling',
        'learning_rate_init': 0.001,
        'max_iter': 1000,
        'activation': 'relu'
    }

    model1(train, test, constant_params)
    model2(train, test, constant_params)
    model3(train, test, constant_params)
Ejemplo n.º 2
0
def main():
    complete = True
    titanic = Titanic('../data/titanic/')
    train, test = titanic.load_train(complete), titanic.load_test(complete)

    nb(train, test)
    regressions(train, test)
def titanic():
    titanic_train = Titanic('../../data/titanic/').load_train(complete=False)

    X, y = titanic_train.data, titanic_train.target
    survived = y == 1
    died = y == 0

    p = 35
    X_emb = TSNE(n_components=2, perplexity=p, random_state=0,
                 verbose=1).fit_transform(X)

    plt.figure()
    plt.title("t-SNE for Titanic. perplexity={}, persons={}".format(p, len(y)))

    plt.scatter(X_emb[survived, 0],
                X_emb[survived, 1],
                c='g',
                marker='.',
                label='Survived')
    plt.scatter(X_emb[died, 0],
                X_emb[died, 1],
                c='r',
                marker='x',
                label='Died')

    plt.legend(loc='best', shadow=False, scatterpoints=1)
    plt.show()
Ejemplo n.º 4
0
def titanic_lda_pca():
    complete = False
    titanic = Titanic('../data/titanic/')
    train = titanic.load_train(complete)
    test = titanic.load_test(complete)
    X, y = train.data, train.target

    # todo join train + test -> PCA -> split again
    train_pca = PCA(n_components=2, random_state=0)
    train_pca_X = train_pca.fit_transform(X)

    test_pca = PCA(n_components=2, random_state=0)
    test_pca_X = test_pca.fit_transform(test.data)

    m = RandomForestClassifier(random_state=0, max_depth=4, min_samples_split=25, min_samples_leaf=10)
    m.fit(train_pca_X, y)  # todo bokeh https://bokeh.pydata.org/en/latest/docs/gallery.html

    survival_prediction = m.predict(test_pca_X)

    # Статистика
    print('Titanic PCA stats:')
    print('explained variance ratio (first two components): {}'.format(train_pca.explained_variance_ratio_))
    print('Components_: \n', pd.DataFrame(train_pca.components_, ['PC1', 'PC2']))

    print('RF for PCA data: acc = {}%, tested {} total.'.format((survival_prediction == test.target).mean(),
                                                                 len(survival_prediction)))

    train_lda = LinearDiscriminantAnalysis(n_components=2)
    traind_lda_X = train_lda.fit_transform(X, y)

    test_lda = LinearDiscriminantAnalysis(n_components=2)
    test_lda_X = test_lda.fit_transform(test.data, test.target)

    m = RandomForestClassifier(random_state=0, max_depth=4, min_samples_split=25, min_samples_leaf=10)
    m.fit(traind_lda_X, y)

    survival_prediction = m.predict(test_lda_X)

    print('RF for LDA data: acc = {}%, tested {} total.'.format((survival_prediction == test.target).mean(),
                                                                 len(survival_prediction)))

    pca_viz(train_pca_X, y)
    lda_viz(traind_lda_X, y)
    plt.show()
Ejemplo n.º 5
0
def main():
    """
    https://www.kaggle.com/c/titanic/data
    """

    complete = False
    titanic = Titanic('../data/titanic/')
    train, test = titanic.load_train(complete), titanic.load_test(complete)

    params = [
        # 1
        {},
        # 2
        {
            "max_depth": 10,
            "min_samples_split": 10,
            "min_samples_leaf": 50,
            "max_features": 2
        },
        # 3
        {
            "max_depth": 4,
            "min_samples_split": 10,
            "min_samples_leaf": 50,
            "max_features": 3
        },
        # 4
        {
            "max_depth": 10,
            "min_samples_split": 40,
            "min_samples_leaf": 10,
            "max_features": 2
        },
        # 5
        {
            "max_depth": 10,
            "min_samples_split": 2,
            "min_samples_leaf": 1,
            "max_features": 3
        },
        # 6
        {
            "max_depth": 10,
            "min_samples_split": 25,
            "min_samples_leaf": 10,
            "max_features": None
        },
        # 7
        {
            "max_depth": 10,
            "min_samples_split": 25,
            "min_samples_leaf": 10,
            "max_features": 6
        },
    ]

    for i, p in enumerate(params):
        dt = DecisionTreeClassifier(random_state=5, **p)

        dt.fit(train.data, train.target)

        survival_prediction = dt.predict(test.data)

        print('DT {}: acc = {}%, tested {} total.'.format(
            i + 1,
            np.round((survival_prediction == test.target).mean(), 4) * 100,
            len(survival_prediction)))

        dt_viz(dt, train.feature_names, train.target_names,
               "dt_titanic_{}_model.tmp".format(i + 1))
def main():
    titanic = Titanic('../data/titanic/')
    train, test = titanic.load_train(False), titanic.load_test(False)

    params = [
        #1
        {},
        #2
        {
            "kernel": "rbf",
            "C": 1,
            "max_iter": -1,
            "tol": 1e-3
        },
        #3
        {
            "kernel": "rbf",
            "C": 1,
            "max_iter": -1,
            "tol": 1e-5
        },
        #4
        {
            "kernel": "rbf",
            "C": 3,
            "max_iter": -1,
            "tol": 1e-3
        },
        #5
        {
            "kernel": "rbf",
            "C": 3,
            "max_iter": -1,
            "tol": 1e-5
        },
        #6
        {
            "kernel": "linear",
            "C": 1,
            "max_iter": 10 * 10**6,
            "tol": 1e-3
        },
        #7
        {
            "kernel": "linear",
            "C": 1,
            "max_iter": 10 * 10**6,
            "tol": 1e-5
        },
        #8
        {
            "kernel": "linear",
            "C": 3,
            "max_iter": 10 * 10**6,
            "tol": 1e-3
        },
        #9
        {
            "kernel": "linear",
            "C": 3,
            "max_iter": 10 * 10**6,
            "tol": 1e-5
        }
    ]
    params = [
        {
            "kernel": "linear",
            "C": 1,
            "max_iter": 10 * 10**1,
            "tol": 0.001
        },
    ]
    for i, p in enumerate(params):
        m = SVC(random_state=0, **p)

        m.fit(train.data, train.target)

        survival_prediction = m.predict(test.data)

        print('SVM {}: acc = {}%, tested {} total.'.format(
            i + 1,
            np.round((survival_prediction == test.target).mean(), 4) * 100,
            len(survival_prediction)))
Ejemplo n.º 7
0
def main():

    params = [{}, {
        "max_depth": 15,
        "n_estimators": 13,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 2,
    }, {
        "max_depth": 15,
        "n_estimators": 5,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 2
    }, {
        "max_depth": 15,
        "n_estimators": 2,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 2
    }, {
        "max_depth": 15,
        "n_estimators": 13,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 3
    }, {
        "max_depth": 15,
        "n_estimators": 5,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 3
    }, {
        "max_depth": 15,
        "n_estimators": 2,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 3
    }, {
        "max_depth": 15,
        "n_estimators": 13,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 7
    }, {
        "max_depth": 15,
        "n_estimators": 5,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 7
    }, {
        "max_depth": 15,
        "n_estimators": 2,
        "min_samples_split": 50,
        "min_samples_leaf": 10,
        "max_features": 7
    }]

    complete = True
    titanic = Titanic('../data/titanic/')
    train, test = titanic.load_train(complete), titanic.load_test(complete)

    for i, p in enumerate(params):
        m = RandomForestClassifier(random_state=0, **p)

        m.fit(train.data, train.target)

        survival_prediction = m.predict(test.data)

        print('Random Forest: acc = {}%, tested {} total.'.format(
            (survival_prediction == test.target).mean(),
            len(survival_prediction)))