Example #1
0
    print(train.columns)
    X_train = train[cols_to_use].values
    y_train = train['click'].values
    # X_train = X_train[:100]
    # y_train = y_train[:100]
    testID = test['ID']
    test = test[cols_to_use].values
    print(X_train.shape, y_train.shape)
    print("Saving data.....")
    ls.save_obj(X_train, "X_train")
    ls.save_obj(y_train, "y_train")
    ls.save_obj(testID, "testID")
    ls.save_obj(test, "test")
    exit()
print("Training data.....")
X_train = ls.load_obj("X_train")
y_train = ls.load_obj("y_train")
testID = ls.load_obj("testID")
test = ls.load_obj("test")
click_weight = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train), y_train)


def auc(y_true, y_pred):
    return metrics.roc_auc_score(y_true, y_pred)


def get_class_weights(y, smooth_factor=0):
    """
     values around 0.1 (smooth factor) are a good default for very imbalanced classes.
    Returns the weights for each class based on the frequencies of the samples
    print(train.columns)
    X_train = train[cols_to_use].values
    y_train = train['click'].values
    # X_train = X_train[:100]
    # y_train = y_train[:100]
    testID = test['ID']
    test = test[cols_to_use].values
    print(X_train.shape, y_train.shape)
    print("Saving data.....")
    ls.save_obj(X_train, "X_train_lstm")
    ls.save_obj(y_train, "y_train_lstm")
    ls.save_obj(testID, "testID_lstm")
    ls.save_obj(test, "test_lstm")
    exit()
print("Training data.....")
X_train = ls.load_obj("X_train_lstm")
y_train = ls.load_obj("y_train_lstm")
testID = ls.load_obj("testID_lstm")
test = ls.load_obj("test_lstm")
testUnique = np.unique(test)
trainUnique = np.unique(X_train)
dataUnique = np.unique(np.concatenate([trainUnique, testUnique]))
print("X_train", X_train.shape)
print("testUnique", testUnique.shape)
print("trainUnique", trainUnique.shape)
print("dataUnique", dataUnique.shape)
max_features = dataUnique.shape[0]
click_weight = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train), y_train)