print(train.columns) X_train = train[cols_to_use].values y_train = train['click'].values # X_train = X_train[:100] # y_train = y_train[:100] testID = test['ID'] test = test[cols_to_use].values print(X_train.shape, y_train.shape) print("Saving data.....") ls.save_obj(X_train, "X_train") ls.save_obj(y_train, "y_train") ls.save_obj(testID, "testID") ls.save_obj(test, "test") exit() print("Training data.....") X_train = ls.load_obj("X_train") y_train = ls.load_obj("y_train") testID = ls.load_obj("testID") test = ls.load_obj("test") click_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train) def auc(y_true, y_pred): return metrics.roc_auc_score(y_true, y_pred) def get_class_weights(y, smooth_factor=0): """ values around 0.1 (smooth factor) are a good default for very imbalanced classes. Returns the weights for each class based on the frequencies of the samples
print(train.columns) X_train = train[cols_to_use].values y_train = train['click'].values # X_train = X_train[:100] # y_train = y_train[:100] testID = test['ID'] test = test[cols_to_use].values print(X_train.shape, y_train.shape) print("Saving data.....") ls.save_obj(X_train, "X_train_lstm") ls.save_obj(y_train, "y_train_lstm") ls.save_obj(testID, "testID_lstm") ls.save_obj(test, "test_lstm") exit() print("Training data.....") X_train = ls.load_obj("X_train_lstm") y_train = ls.load_obj("y_train_lstm") testID = ls.load_obj("testID_lstm") test = ls.load_obj("test_lstm") testUnique = np.unique(test) trainUnique = np.unique(X_train) dataUnique = np.unique(np.concatenate([trainUnique, testUnique])) print("X_train", X_train.shape) print("testUnique", testUnique.shape) print("trainUnique", trainUnique.shape) print("dataUnique", dataUnique.shape) max_features = dataUnique.shape[0] click_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)