def load_args(self, args): ''' 根据输入读取数据。 get the input ''' super(MRNaiveBayesTest, self).load_args(args) if self.options.continuous_features is not None: self.continuous = [] temp = self.options.continuous_features.split(',') for num in temp: try: num = int(num) except: self.option_parser.error( "The continuous features number you type in are not integer" ) self.continuous.append(num) # 读取model get the model if self.options.model is None: self.option_parser.error("please type the path to the model") else: self.model = { } # 记录每个类别下所有特征取值的数量 count the number of features for each category self.total = {} # 记录每个类别的总数 count the number of each distribution job = NaiveBayes.MRNaiveBayesTrain() with open(current + '/' + self.options.model, encoding='utf-8') as src: for line in src: try: # 该行不是'all'行,读取该类别下该特征下该特征取值的数量, # if the line is not all, take the number of the features for this category (cat, feature), (key, num) = job.parse_output_line(line.encode()) except: # 该行是'all'行,读取该类别的总数量 if it is 'all', get the number of total features (cat, _), num = job.parse_output_line(line.encode()) self.total[cat] = num continue if (cat not in self.model): # 若该类别不在model中,建立该类别 #if this category not in the model, establish this category self.model[cat] = {} if (feature not in self.model[cat]): # 若该特征不在model[cat]中,建立该特征 #if this feature not in model[cat], establish this feature self.model[cat][feature] = {} self.model[cat][feature][ key] = num # 记录数量 count the number
def load_args(self,args): ''' 根据输入读取数据。 ''' super(MRNaiveBayesTest,self).load_args(args) if self.options.continuous_features is not None: self.continuous=[] temp = self.options.continuous_features.split(',') for num in temp: try: num = int(num) except: self.option_parser.error("The continuous features number you type in are not integer") self.continuous.append(num) #读取model if self.options.model is None: self.option_parser.error("please type the path to the model") else: self.model = {} #记录每个类别下所有特征取值的数量 self.total = {} #记录每个类别的总数 job = NaiveBayes.MRNaiveBayesTrain() with open(current+'/'+self.options.model,encoding='utf-8') as src: for line in src: try: #该行不是'all'行,读取该类别下该特征下该特征取值的数量 (cat, feature), (key, num) = job.parse_output_line(line.encode()) except: #该行是'all'行,读取该类别的总数量 (cat, _), num = job.parse_output_line(line.encode()) self.total[cat] = num continue if(cat not in self.model): #若该类别不在model中,建立该类别 self.model[cat] = {} if(feature not in self.model[cat]): #若该特征不在model[cat]中,建立该特征 self.model[cat][feature] = {} self.model[cat][feature][key] = num #记录数量