def cls_create(xs, ys): if algo == "SVM": classifier = svm.SVC(C = self.parm, probability=True) elif algo == "RF": classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy', n_jobs = 1) # #classifier = LDA() new_xs = xs """ positive_count = len([y for y in ys if y > 0]) if positive_count >= 20: #self.selector = svm.LinearSVC(C = 1, dual = False, penalty="l1") self.selector = LDA() new_xs = self.selector.fit_transform(xs, ys) else: self.selector = None """ classifier.fit(new_xs, ys) probs = classifier.predict_proba(new_xs) #self.pclassifier = svm.SVC(parm_val = 1.0) #self.pclassifier.fit(probs, ys) self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys) return classifier
def cls_create(xs, ys): rf_cls = RandomForestClassifier(n_estimators = self.num_trees, criterion=self.criterion, n_jobs = 1) new_xs, new_ys = over_sample(xs, ys) rf_cls.fit(new_xs, new_ys) """ Use original xs """ probs = rf_cls.predict_proba(xs) self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys) return rf_cls
def cls_create(xs, ys): rf_cls = RandomForestClassifier(n_estimators=self.num_trees, criterion=self.criterion, n_jobs=1) new_xs, new_ys = over_sample(xs, ys) rf_cls.fit(new_xs, new_ys) """ Use original xs """ probs = rf_cls.predict_proba(xs) self.threshold, self.positive, self.negative = best_threshold_for_f1( probs, 20, ys) return rf_cls
def cls_create(xs, ys): if algo == "SVM": classifier = svm.SVC(C = self.parm, probability=True) elif algo == "RF": classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy', n_jobs = 1) new_xs, new_ys = over_sample(xs, ys) classifier.fit(new_xs, new_ys) probs = classifier.predict_proba(xs) self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 5, ys) return classifier
def cls_create(xs, ys): #print("Training classifier") if algo == "SVM": classifier = svm.SVC(C = self.parm, probability=True) elif algo == "RF": classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy', n_jobs = 1) elif algo == "LogisticRegression": classifier = LogisticRegression() else: raise Exception("Unknown algorithm: " + algo) new_xs, new_ys = over_sample(xs, ys) classifier.fit(new_xs, new_ys) probs = classifier.predict_proba(xs) self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 5, ys) # Override threshold return classifier