Example #1
0
class BasePredictions:
    def __init__(self, **kwargs):
        self.X = kwargs.get("X")
        self.y = kwargs.get("y")
        self.combs = kwargs.get("combs")
        self.clf = kwargs.get("clf")

        # arguments with default values
        self.top = kwargs.get("top", 20)
        self.mean_fpr = np.linspace(0, 1, 100)
        self.n_estimators = kwargs.get("n_estimators", 500)
        self.class_weight = kwargs.get("class_weight", "balanced")
        self.min_samples_split = kwargs.get("min_samples_split", 3)
        self.min_samples_leaf = kwargs.get("min_samples_leaf", 3)
        self.colsample_bytree = kwargs.get("colsample_bytree", 0.6)
        self.learning_rate = kwargs.get("learning_rate", 0.1)
        self.random_state = kwargs.get("random_state", 125)
        self.max_depth = kwargs.get("max_depth", None)
        self.objective = kwargs.get("objective", 'binary:logistic')
        self.scale_pos_weight = kwargs.get("scale_pos_weight", 1)

        #for neural net
        self.layers = kwargs.get("layers", 1)
        self.dropout = kwargs.get("dropout", 0.1)
        self.nodes = kwargs.get("nodes", 128)
        self.epochs = kwargs.get("epochs", 400)
        self.steps = kwargs.get("steps", 64)
        self.learning_rate_deep = kwargs.get("learning_rate_deep", 0.001)
        self.beta_1 = kwargs.get("beta_1", 0.9)
        self.beta_2 = kwargs.get("beta_2", 0.999)

        self._set_classifier()

        self.predicted = dict()
        self.topfeat = dict()
        self.fpr = dict()
        self.tpr = dict()
        self.tprs = dict()
        self.auc = dict()
        self.precision = dict()
        self.recall = dict()
        self.avprec = dict()

    def _set_classifier(self):
        if self.clf.lower() == "randomforest":
            self.clf = RandomForestClassifier(
                bootstrap=True,
                class_weight=self.class_weight,
                max_depth=self.max_depth,
                n_estimators=self.n_estimators,
                max_features='sqrt',
                min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf,
                random_state=self.random_state,
                n_jobs=-1)
        elif self.clf.lower() == "xgboost":
            if self.max_depth is None:
                self.max_depth = 5

            self.clf = xgb.XGBClassifier(
                learning_rate=self.learning_rate,
                colsample_bytree=self.colsample_bytree,
                random_state=self.random_state,
                max_depth=self.max_depth,
                n_estimators=self.n_estimators,
                scale_pos_weight=self.scale_pos_weight,
                objective=self.objective,
                n_jobs=-1)

        elif self.clf.lower() == "neural_network":

            def Neural_network(dropout=0.1,
                               nodes=128,
                               layers=1,
                               learning_rate_deep=0.001,
                               beta_1=0.99,
                               beta_2=0.99):
                clf = Sequential()
                for i in range(self.layers):
                    #With BN
                    #clf.add(Dense(self.nodes, activation="linear", use_bias="False"))
                    #clf.add(BatchNormalization())
                    #clf.add(Activation("relu"))
                    #clf.add(Dropout(self.dropout))

                    #Without BN
                    #LeakyReLU
                    #clf.add(Dense(self.nodes))
                    #clf.add(LeakyReLU())

                    #ReLU
                    clf.add(Dense(self.nodes, activation="relu"))
                    clf.add(Dropout(self.dropout))
                clf.add(Dense(3, activation="softmax"))

                opt = keras.optimizers.Adam(
                    learning_rate=self.learning_rate_deep,
                    beta_1=self.beta_1,
                    beta_2=self.beta_2)
                clf.compile(
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                    optimizer=opt,
                    metrics=["accuracy"
                             ])  #other option loss = "binary_crossentropy"
                return clf

            self.clf = KerasClassifier(
                Neural_network,
                epochs=self.epochs,
                steps_per_epoch=self.steps,
                dropout=self.dropout,
                nodes=self.nodes,
                layers=self.layers,
                learning_rate_deep=self.learning_rate_deep,
                beta_1=self.beta_1,
                beta_2=self.beta_2)  #class_weight=self.class_weight

        else:
            raise ValueError(
                "only randomforest, xgboost and neural_network are supported")