def __init__(self): self._header = None self._root = None self._grace_period = 200 self._split_confidence = 0.0000001 self._hoeffding_tie_threshold = 0.05 self._min_frac_weight_for_two_branches_gain = 0.01 # Split metric stuff goes here self.GINI_SPLIT = 0 self.INFO_GAIN_SPLIT = 1 self._selected_split_metric = self.INFO_GAIN_SPLIT self._split_metric = InfoGainSplitMetric( self._min_frac_weight_for_two_branches_gain) # self._selected_split_metric = self.GINI_SPLIT # self._split_metric = GiniSplitMetric() # Leaf prediction strategy stuff goes here # Only used when the leaf prediction strategy is baded on Naive Bayes, not useful right now #self._nb_threshold = 0 self._active_leaf_count = 0 self._inactive_leaf_count = 0 self._decision_node_count = 0 # Print out leaf models in the case of naive Bayes or naive Bayes adaptive leaves self._print_leaf_models = False
def __init__(self): self._header = None self._root = None self._grace_period = 200 self._split_confidence = 0.0000001 self._hoeffding_tie_threshold = 0.05 self._min_frac_weight_for_two_branches_gain = 0.01 # Split metric stuff goes here self.GINI_SPLIT = 0 self.INFO_GAIN_SPLIT = 1 self._selected_split_metric = self.INFO_GAIN_SPLIT self._split_metric = InfoGainSplitMetric( self._min_frac_weight_for_two_branches_gain) #self._selected_split_metric = self.GINI_SPLIT #self._split_metric = GiniSplitMetric() # Leaf prediction strategy stuff goes here # Only used when the leaf prediction strategy is baded on Naive Bayes, not useful right now #self._nb_threshold = 0 self._active_leaf_count = 0 self._inactive_leaf_count = 0 self._decision_node_count = 0 # Print out leaf models in the case of naive Bayes or naive Bayes adaptive leaves self._print_leaf_models = False logging.basicConfig( level=logging.DEBUG, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', #filename='../log/tmp.log', datefmt='%a, %d %b %Y %H:%M:%S', filemode='w') self.logger = logging.getLogger("vfdt") #self.name # 定义一个FileHandler,将INFO级别或更高的日志信息记录到log文件,并将其添加到当前的日志处理对象# # fh = logging.FileHandler('../log/' + "vfdt" + '.log', mode='w')# self.name # formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') # fh.setFormatter(formatter) # fh.setLevel(logging.INFO) # self.logger.addHandler(fh) # 定义一个StreamHandler,将INFO级别或更高的日志信息打印到标准错误,并将其添加到当前的日志处理对象# console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter( '%(message)s') # ('%(name)-12s: %(levelname)-8s ) console.setFormatter(formatter) self.logger.addHandler(console)
def build_classifier(self, dataset, testdataset=None): """Build the classifier. Args: dataset (Dataset): The data to start training the classifier. """ #日志部分 self.logger.info( "Start build classifier--------------------------\n" "grace_period = {},\n" "split_confidence = {},\n" "hoeffding_tie_threshold = {},\n" "min_frac_weight_for_two_branches_gain = {}\n" "--------------------------------------------------".format( self._grace_period, self._split_confidence, self._hoeffding_tie_threshold, self._min_frac_weight_for_two_branches_gain)) #训练部分 self.reset() self._header = dataset if self._selected_split_metric is self.GINI_SPLIT: self._split_metric = GiniSplitMetric() else: self._split_metric = InfoGainSplitMetric( self._min_frac_weight_for_two_branches_gain) count = 0 test_epoch = 100 test_acc_list = [] for i in range(dataset.num_instances()): self.update_classifier(dataset.instance(i)) count += 1 if count % test_epoch == 0 and testdataset != None: acc = self.valuate_acc(testdataset) test_acc_list.append(acc) #画图部分 if testdataset != None: x_axis = range(len(test_acc_list)) plt.plot( x_axis, test_acc_list, label='test_acc') # Plot some data on the (implicit) axes. plt.xlabel('iter') plt.ylabel('acc') figpath = './respic/' + 'acc.jpg' plt.title(figpath.split('/')[-1]) plt.legend() plt.savefig(figpath) plt.close('all')
def build_classifier(self, dataset): """Build the classifier. Args: dataset (Dataset): The data to start training the classifier. """ self.reset() self._header = dataset if self._selected_split_metric is self.GINI_SPLIT: self._split_metric = GiniSplitMetric() else: self._split_metric = InfoGainSplitMetric( self._min_frac_weight_for_two_branches_gain) for i in range(dataset.num_instances()): self.update_classifier(dataset.instance(i))