class BasePredictions: def __init__(self, **kwargs): self.X = kwargs.get("X") self.y = kwargs.get("y") self.combs = kwargs.get("combs") self.clf = kwargs.get("clf") # arguments with default values self.top = kwargs.get("top", 20) self.mean_fpr = np.linspace(0, 1, 100) self.n_estimators = kwargs.get("n_estimators", 500) self.class_weight = kwargs.get("class_weight", "balanced") self.min_samples_split = kwargs.get("min_samples_split", 3) self.min_samples_leaf = kwargs.get("min_samples_leaf", 3) self.colsample_bytree = kwargs.get("colsample_bytree", 0.6) self.learning_rate = kwargs.get("learning_rate", 0.1) self.random_state = kwargs.get("random_state", 125) self.max_depth = kwargs.get("max_depth", None) self.objective = kwargs.get("objective", 'binary:logistic') self.scale_pos_weight = kwargs.get("scale_pos_weight", 1) #for neural net self.layers = kwargs.get("layers", 1) self.dropout = kwargs.get("dropout", 0.1) self.nodes = kwargs.get("nodes", 128) self.epochs = kwargs.get("epochs", 400) self.steps = kwargs.get("steps", 64) self.learning_rate_deep = kwargs.get("learning_rate_deep", 0.001) self.beta_1 = kwargs.get("beta_1", 0.9) self.beta_2 = kwargs.get("beta_2", 0.999) self._set_classifier() self.predicted = dict() self.topfeat = dict() self.fpr = dict() self.tpr = dict() self.tprs = dict() self.auc = dict() self.precision = dict() self.recall = dict() self.avprec = dict() def _set_classifier(self): if self.clf.lower() == "randomforest": self.clf = RandomForestClassifier( bootstrap=True, class_weight=self.class_weight, max_depth=self.max_depth, n_estimators=self.n_estimators, max_features='sqrt', min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, random_state=self.random_state, n_jobs=-1) elif self.clf.lower() == "xgboost": if self.max_depth is None: self.max_depth = 5 self.clf = xgb.XGBClassifier( learning_rate=self.learning_rate, colsample_bytree=self.colsample_bytree, random_state=self.random_state, max_depth=self.max_depth, n_estimators=self.n_estimators, scale_pos_weight=self.scale_pos_weight, objective=self.objective, n_jobs=-1) elif self.clf.lower() == "neural_network": def Neural_network(dropout=0.1, nodes=128, layers=1, learning_rate_deep=0.001, beta_1=0.99, beta_2=0.99): clf = Sequential() for i in range(self.layers): #With BN #clf.add(Dense(self.nodes, activation="linear", use_bias="False")) #clf.add(BatchNormalization()) #clf.add(Activation("relu")) #clf.add(Dropout(self.dropout)) #Without BN #LeakyReLU #clf.add(Dense(self.nodes)) #clf.add(LeakyReLU()) #ReLU clf.add(Dense(self.nodes, activation="relu")) clf.add(Dropout(self.dropout)) clf.add(Dense(3, activation="softmax")) opt = keras.optimizers.Adam( learning_rate=self.learning_rate_deep, beta_1=self.beta_1, beta_2=self.beta_2) clf.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=opt, metrics=["accuracy" ]) #other option loss = "binary_crossentropy" return clf self.clf = KerasClassifier( Neural_network, epochs=self.epochs, steps_per_epoch=self.steps, dropout=self.dropout, nodes=self.nodes, layers=self.layers, learning_rate_deep=self.learning_rate_deep, beta_1=self.beta_1, beta_2=self.beta_2) #class_weight=self.class_weight else: raise ValueError( "only randomforest, xgboost and neural_network are supported")