Exemple #1
0
def cnn(filters,
        pooling_size=2,
        epochs=15,
        table_folder="/",
        kernel_size=3,
        input_dim=34,
        batch_size=32,
        nb_filters=34,
        time_from=32,
        time_to=8,
        downsample_ratio=None,
        oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    print("Creating layers...")
    model = Sequential()
    model.add(
        Conv1D(nb_filters,
               kernel_size=kernel_size,
               input_shape=(timesteps, input_dim),
               activation='relu'))
    model.add(MaxPooling1D(pooling_size))
    # model.add(Conv1D(nb_filters, kernel_size=kernel_size, activation='relu'))
    # model.add(Conv1D(nb_filters*2, kernel_size=3, activation='relu'))
    # model.add(GlobalAveragePooling1D())
    # model.add(Dropout(0.5))
    model.add(Flatten())
    # model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    print("Compiling model...")
    model.compile(loss='mean_squared_error',
                  optimizer='rmsprop',
                  metrics=[
                      'accuracy',
                      keras_metrics.precision(),
                      keras_metrics.recall(),
                      keras_metrics.f1_score()
                  ])
    print("Fitting model...")
    print(model.summary())
    callback = [EarlyStopping(monitor='loss', patience=5)]

    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        batch_size=batch_size,
                        epochs=epochs)  #, callbacks=callback)
    score = model.evaluate(X_test, y_test, batch_size=batch_size)
    y_pred = model.predict(X_test)

    log_to_csv("cnn", score, history, filters,
               table_folder, input_dim, batch_size, time_from, time_to,
               model.to_json(), nb_filters, kernel_size)

    return [score, history, churn_number, total_number, y_pred]
Exemple #2
0
def svm_run(filters,
            c_range=1.0,
            kernel_type='rbf',
            gamma='auto',
            train_sizes=[15, 100, 300, 500, 800],
            table_folder="/",
            save_file=None,
            time_from=32,
            time_to=8,
            downsample_ratio=None,
            oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    X_train = list(map(lambda x: x.flatten(), X_train))
    X_test = list(map(lambda x: x.flatten(), X_test))

    clf = svm.SVC(kernel=kernel_type, gamma=gamma, C=c_range)

    # train_sizes, train_scores, validation_scores = learning_curve(clf, X_train, y_train, train_sizes=train_sizes, cv=5, shuffle=True, scoring='f1')
    train_sizes, train_scores, validation_scores = training_curve(
        clf,
        X_train,
        y_train,
        X_test,
        y_test,
        train_sizes=train_sizes,
        shuffle=True,
        scoring='precision',
        train_last=True)

    # print(train_scores, valid_scores)
    clf.fit(X_train, y_train)

    # cross_val_score(clf, X_train, y_train, scoring='recall_macro', cv=5)
    y_pred = clf.predict(X_test)
    if kernel_type == 'linear':
        feature_importances = clf.coef_.flatten()
    else:
        feature_importances = []
    scores = [
        accuracy_score(y_test, y_pred),
        precision_score(y_test, y_pred),
        recall_score(y_test, y_pred),
        hinge_loss(y_test, y_pred),
        f1_score(y_test, y_pred)
    ]

    # print_feature_importances(feature_importances, feature_names, all=True)
    print(y_pred)
    return [
        y_pred, y_test, feature_importances, scores, train_sizes, train_scores,
        validation_scores, churn_number, total_number, feature_names
    ]
Exemple #3
0
def lstm5(filters,
          epochs=15,
          table_folder="/",
          save_file=None,
          input_dim=34,
          batch_size=32,
          time_from=32,
          time_to=8,
          downsample_ratio=None,
          oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    print("Creating layers...")

    model = Sequential()
    model.add(
        LSTM(34, input_length=timesteps, input_dim=34, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(input_dim, return_sequences=True))
    model.add(Dropout(0.2))
    # model.add(LSTM(input_dim, return_sequences=True))
    # model.add(Dropout(0.2))
    # model.add(LSTM(input_dim, return_sequences=True))
    # model.add(Dropout(0.2))
    model.add(LSTM(input_dim))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    print("Compiling model...")
    model.compile(loss='mean_squared_error',
                  optimizer='rmsprop',
                  metrics=[
                      'accuracy',
                      keras_metrics.precision(),
                      keras_metrics.recall(),
                      keras_metrics.f1_score()
                  ])
    print("Fitting model...")
    print(model.summary())
    callback = [EarlyStopping(monitor='val_loss', patience=5)]

    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        batch_size=batch_size,
                        epochs=epochs)  #, callbacks=callback)
    score = model.evaluate(X_test, y_test, batch_size=batch_size)
    y_pred = model.predict(X_test)

    log_to_csv("lstm", score, history, filters, table_folder, input_dim,
               batch_size, time_from, time_to, model.to_json())

    return [score, history, churn_number, total_number, y_pred]
Exemple #4
0
def optimize_svm_hyperparameters(filters,
                                 train_sizes=[15, 100, 300, 500, 800],
                                 table_folder="/",
                                 save_file=None,
                                 time_from=32,
                                 time_to=8,
                                 downsample_ratio=None,
                                 oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    X_train = list(map(lambda x: x.flatten(), X_train))
    X_test = list(map(lambda x: x.flatten(), X_test))

    clf = svm.SVC()

    grid_result = grid_search_svm(clf, c_ranges, gammas, kernels, X_train,
                                  y_train, X_test, y_test)
    print(grid_result)
Exemple #5
0
def optimize_rf_hyperparameters(filters,
                                train_sizes=[15, 100, 300, 500, 800],
                                table_folder="/",
                                save_file=None,
                                time_from=32,
                                time_to=8,
                                downsample_ratio=None,
                                oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    X_train = list(map(lambda x: x.flatten(), X_train))
    X_test = list(map(lambda x: x.flatten(), X_test))

    clf = RandomForestClassifier()

    grid_result = grid_search_rf(clf, n_estimators, max_depth,
                                 min_samples_splits, min_samples_leafs,
                                 max_featuress, X_train, y_train, X_test,
                                 y_test)
    print(grid_result)
Exemple #6
0
def rf_run(filters,
           n_estimators=10,
           max_depth=None,
           min_samples_split=2,
           min_samples_leaf=1,
           max_features="auto",
           train_sizes=[15, 100, 300, 500, 800],
           epochs=15,
           table_folder="/",
           save_file=None,
           input_dim=34,
           batch_size=32,
           time_from=32,
           time_to=8,
           downsample_ratio=None,
           oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    X_train = list(map(lambda x: x.flatten(), X_train))
    X_test = list(map(lambda x: x.flatten(), X_test))

    clf = RandomForestClassifier(n_estimators=n_estimators,
                                 max_depth=max_depth,
                                 min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf,
                                 max_features=max_features)

    # train_sizes, train_scores, validation_scores = learning_curve(clf, X_train, y_train, train_sizes=train_sizes, cv=5, shuffle=True, scoring='f1')
    train_sizes, train_scores, validation_scores = training_curve(
        clf,
        X_train,
        y_train,
        X_test,
        y_test,
        train_sizes=train_sizes,
        shuffle=True,
        scoring='precision',
        train_last=True)
    # exit()
    # print(train_sizes)
    # print(train_scores)
    # print(validation_scores)
    clf.fit(X_train, y_train)

    # cross_val_score(clf, X_train, y_train, scoring='recall_macro', cv=5)

    y_pred = clf.predict(X_test)
    feature_importances = clf.feature_importances_
    scores = [
        accuracy_score(y_test, y_pred),
        precision_score(y_test, y_pred),
        recall_score(y_test, y_pred),
        hinge_loss(y_test, y_pred),
        f1_score(y_test, y_pred)
    ]
    # print_feature_importances(feature_importances, feature_names)
    # exit()
    print(len(y_pred))
    print(len(y_test))
    return [
        y_pred, y_test, feature_importances, scores, train_sizes, train_scores,
        validation_scores, churn_number, total_number, feature_names
    ]
Exemple #7
0
def lstm(filters,
         epochs=15,
         table_folder="/",
         save_file=None,
         input_dim=34,
         batch_size=32,
         time_from=32,
         time_to=8,
         downsample_ratio=None,
         oversample=None):
    timesteps = time_from - time_to

    X_train, X_test, y_train, y_test, churn_number, total_number = import_and_preprocess_table(
        timesteps, time_from, time_to, filters, table_folder, downsample_ratio,
        oversample)

    print("Creating layers...")

    kfold = StratifiedKFold(n_splits=5, shuffle=True)
    scores = []
    histories = []
    churn_numbers = []
    total_numbers = []
    history_names = [i for i in range(0, kfold.get_n_splits())]

    for train, test in kfold.split(np.zeros(len(y_train)), y_train):
        model = Sequential()
        model.add(
            LSTM(input_dim,
                 input_length=timesteps,
                 input_dim=input_dim,
                 return_sequences=True))
        model.add(LSTM(input_dim))
        model.add(Dense(1, activation='sigmoid'))
        print("Compiling model...")
        model.compile(loss='mean_squared_error',
                      optimizer='rmsprop',
                      metrics=['accuracy'])
        print("Fitting model...")
        print(model.summary())
        callback = [EarlyStopping(monitor='val_loss', patience=5)]

        history = model.fit(X_train[test],
                            y_train[test],
                            validation_data=(X_test, y_test),
                            batch_size=batch_size,
                            epochs=epochs)  #, callbacks=callback)
        score = model.evaluate(X_test, y_test, batch_size=batch_size)
        scores.append(score)
        histories.append(history)
        churn_numbers.append(churn_number)
        total_numbers.append(total_number)
        log_to_csv("lstm", score, history, filters, table_folder, input_dim,
                   batch_size, time_from, time_to, model.to_json())

    plot_to_file(histories=histories,
                 history_names=history_names,
                 plot_types=["acc", "loss"],
                 algorithm="lstm",
                 save_file="regular")
    pretty_print_scores(scores, history_names, churn_numbers, total_numbers)