def cls_create(xs, ys):
     
     if algo == "SVM":
         classifier = svm.SVC(C = self.parm, probability=True)
         
     elif algo == "RF":
         classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy',  n_jobs = 1)
     #
     #classifier = LDA()
     
     new_xs = xs
     
     """
     positive_count = len([y for y in ys if y > 0])
     if positive_count >= 20:
     
         #self.selector = svm.LinearSVC(C = 1, dual = False, penalty="l1")
         self.selector = LDA()
         new_xs = self.selector.fit_transform(xs, ys)
     else:
         self.selector = None
     """
     
     classifier.fit(new_xs, ys)
     probs = classifier.predict_proba(new_xs)            
     
     #self.pclassifier = svm.SVC(parm_val = 1.0)
     #self.pclassifier.fit(probs, ys)
     
     self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys)
     return classifier
Esempio n. 2
0
 def cls_create(xs, ys):
     
     if algo == "SVM":
         classifier = svm.SVC(C = self.parm, probability=True)
         
     elif algo == "RF":
         classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy',  n_jobs = 1)
     #
     #classifier = LDA()
     
     new_xs = xs
     
     """
     positive_count = len([y for y in ys if y > 0])
     if positive_count >= 20:
     
         #self.selector = svm.LinearSVC(C = 1, dual = False, penalty="l1")
         self.selector = LDA()
         new_xs = self.selector.fit_transform(xs, ys)
     else:
         self.selector = None
     """
     
     classifier.fit(new_xs, ys)
     probs = classifier.predict_proba(new_xs)            
     
     #self.pclassifier = svm.SVC(parm_val = 1.0)
     #self.pclassifier.fit(probs, ys)
     
     self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys)
     return classifier
        def cls_create(xs, ys):
            
            rf_cls = RandomForestClassifier(n_estimators = self.num_trees, criterion=self.criterion,  n_jobs = 1)
            new_xs, new_ys = over_sample(xs, ys)
            rf_cls.fit(new_xs, new_ys)
            
            """ Use original xs """
            probs = rf_cls.predict_proba(xs)
            self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys)

            return rf_cls
        def cls_create(xs, ys):

            rf_cls = RandomForestClassifier(n_estimators=self.num_trees,
                                            criterion=self.criterion,
                                            n_jobs=1)
            new_xs, new_ys = over_sample(xs, ys)
            rf_cls.fit(new_xs, new_ys)
            """ Use original xs """
            probs = rf_cls.predict_proba(xs)
            self.threshold, self.positive, self.negative = best_threshold_for_f1(
                probs, 20, ys)

            return rf_cls
 def cls_create(xs, ys):
     
     if algo == "SVM":
         classifier = svm.SVC(C = self.parm, probability=True)
         
     elif algo == "RF":
         classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy',  n_jobs = 1)
     
     new_xs, new_ys = over_sample(xs, ys)
     
     classifier.fit(new_xs, new_ys)
     probs = classifier.predict_proba(xs)            
     
     self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 5, ys)
     return classifier
        def cls_create(xs, ys):

            #print("Training classifier")
            if algo == "SVM":
                classifier = svm.SVC(C = self.parm, probability=True)
                
            elif algo == "RF":
                classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy',  n_jobs = 1)

            elif algo == "LogisticRegression":
                classifier = LogisticRegression()
            else:
                raise Exception("Unknown algorithm: " + algo)

            new_xs, new_ys = over_sample(xs, ys)

            classifier.fit(new_xs, new_ys)
            probs = classifier.predict_proba(xs)            
            
            self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 5, ys)
            # Override threshold
            return classifier