Beispiel #1
0
def train_lr():
    params = {
        "offline_model_dir": "../weights",
    }
    params.update(params_common)

    X_train, X_valid = load_data("train"), load_data("vali")
    X_test = load_data("test")
    # print(X_test['label'])

    model = LogisticRegression("ranking", params, logger)
    model.fit(X_train, validation_data=X_valid)
    model.save_session()
    model.predict(X_test, 'pred.txt')
Beispiel #2
0
def logistic_accuracy(model: LogisticRegression, X: np.ndarray,
                      targets: np.ndarray):
    predictions = model.predict(X)  # These are probabilities
    predictions = np.around(predictions)
    predictions = predictions.reshape(-1)
    targets = targets.reshape(-1)
    correct = sum(predictions == targets)
    return correct / len(targets)
Beispiel #3
0
def logistic_loss_function(model: LogisticRegression, X: np.ndarray,
                           targets: np.ndarray):
    """

    Args:
        model: Logistic regression model
        X: array of pca transformed images
        targets: actual labels for the images the predictions are done on

    Returns:
        The average cross entropy loss over all the predictions
    """
    predictions = model.predict(X)

    # Error when label equal 1
    loss_1 = targets * np.log(predictions)

    # Error when label equal 0
    loss_0 = (1 - targets) * np.log(1 - predictions)

    total_loss = loss_1 + loss_0

    # return the average loss overall
    return -total_loss.sum() / targets.shape[0]
Beispiel #4
0
    ####################################
    # Read and preprocess data from files
    ####################################
    df_train = pd.read_csv(fp_train, skipinitialspace=True)
    df_test = pd.read_csv(fp_test, skipinitialspace=True)

    df_train['income'].replace('<=50K', 0, inplace=True)
    df_train['income'].replace('>50K', 1, inplace=True)

    Xtrain, ytrain, Xtest = preprocess(df_train, df_test, features)

    ####################################
    # Train the estimator and predict test data
    ####################################
    regr = []
    if estimator == 'logistic':
        regr = LogisticRegression().fit(Xtrain, ytrain)
        ypred = np.around(regr.predict(Xtest)).astype(int)
    elif estimator == 'generative':
        regr = NaiveBayes().fit(Xtrain, ytrain)
        ypred = regr.predict(Xtest)

    ####################################
    # Write the result to file
    ####################################
    df_pred = pd.DataFrame()
    df_pred['id'] = np.arange(1, len(ypred) + 1)
    df_pred['label'] = ypred
    df_pred.to_csv(fp_ans, index=False)
print('Done')

temp_X_train = np.concatenate((worm_images, noworm_images))
y_train = np.concatenate((worm_label, noworm_label))

print('Shuffling images and labels ...')
X_data, y_data = shuffling_files(temp_X_train, y_train)

print('spliting data .....')
X_train, X_test = data_split(X_data)
y_train, y_test = data_split(y_data)
print('Done')

X_train = X_train / 255
X_test = X_test / 255

model = LogisticRegression(lr=0.02, epochs=500, lamb=8)
tic1 = time.time()
model.fit(X_train, y_train)
toc1 = time.time()

tic2 = time.time()
y_pred = model.predict(X_test)
toc2 = time.time()
y_pred = np.argmax(y_pred, axis=1)

print('Training Time: {}'.format(toc1 - tic1))
print('Testing Time: {}'.format(toc2 - tic2))
print('acc_test: {}'.format(accuracy_score(y_test, y_pred)))
plt.show()