def predictMulticlassBaggingModel_parallel(MatX,nrow,ncol,varnames,num_class,params,multiclassmethod,\ n_gpus,n_parallels,runtimes=300,bool_save=True,savedirbase=""): if not bool_save: print("Bagging Method has to save models!") return evalweightsFileName = "Runtime_Model_Evaluation_Weights.csv" selectvarnamesfiledir = savedirbase + os.sep + "Runtime_Model_Select_Variables.csv" selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName baggingweights = init.getListFromPandas(evalweightsFiledirto, 'weight') selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData bool_mask = init.getMask(MatX) #Assign task to worker RuntimeLists = [[] for i in range(n_parallels)] for runtime in range(runtimes): worker_id = runtime % n_parallels RuntimeLists[worker_id].append(runtime) #Judge bool_gpu if 'gpu' in params.get('tree_method'): bool_gpu = True else: bool_gpu = False P = Pool(n_parallels) results_parallel = [] manager = Manager() CPIDs = manager.list() for i in range(n_parallels): results_parallel.append(P.apply_async(_predictMulticlassBaggingModel,(CPIDs,RuntimeLists[i],MatX,nrow,ncol,varnames,num_class,params,\ selectruntimesvarnames,baggingweights,multiclassmethod,bool_gpu,n_gpus,n_parallels,bool_save,savedirbase))) P.close() P.join() del CPIDs # if multiclassmethod=='softmax': pred_pY_ense = np.zeros([nrow * ncol, num_class], dtype=np.float32) # elif multiclassmethod=='category': # pred_pY_ense=np.zeros(nrow*ncol*num_class,dtype=np.float32) for i in range(n_parallels): temp = results_parallel[i] pred_pY_ense_para = temp.get() pred_pY_ense = pred_pY_ense + pred_pY_ense_para # if multiclassmethod=='softmax': [pred_Y, pred_pY] = init.reshapeMulticlassMatrix(pred_pY_ense, nrow, ncol, num_class, bool_onearray=False, mask=bool_mask.flatten()) # elif multiclassmethod=='category': # [pred_Y,pred_pY]=init.ReshapeMulticlassMatrix(pred_pY_ense,nrow,ncol,num_class,1,bool_stretch=bool_stretch,mask=bool_mask.flatten()) return [pred_Y, pred_pY]
def testSingleclassBaggingModel_parallel(Models,TestDataSet,vtname,params,n_gpus,n_parallels,\ single_thres=0.5,runtimes=300,bool_strclass=False,labelHeaderName="",\ bool_save=False,savedirbase=""): ModelList = [] if bool_save: evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv" selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName baggingweights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData else: [ModelList, selectruntimesvarnames, ense_weights] = Models #Assign task to worker RuntimeLists = [[] for i in range(n_parallels)] for runtime in range(runtimes): worker_id = runtime % n_parallels RuntimeLists[worker_id].append(runtime) #Judge bool_gpu if 'gpu' in params.get('tree_method'): bool_gpu = True else: bool_gpu = False #Open multiprocessing parallel pools P = Pool(n_parallels) results_parallel = [] manager = Manager() CPIDs = manager.list() for i in range(n_parallels): results_parallel.append(P.apply_async(_testSingleclassBaggingModel,(CPIDs,RuntimeLists[i],TestDataSet,vtname,runtime,params,ModelList,\ bool_gpu,n_gpus,n_parallels,selectruntimesvarnames,baggingweights,single_thres,bool_strclass,labelHeaderName,bool_save,savedirbase))) P.close() P.join() del CPIDs pred_pY_ense = np.zeros(len(TestDataSet)) for i in range(n_parallels): temp = results_parallel[i] [pred_Y, pred_pY_ense_para, test_Y] = temp.get() pred_pY_ense = pred_pY_ense + pred_pY_ense_para pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_Y = pred_Y_ense return [pred_Y, pred_pY_ense, test_Y]
def testMulticlassBaggingModel_parallel(TestDataSet,VegeTypes,params,multiclassmethod,n_gpus,n_parallels,runtimes=300,\ bool_strclass=False,labelHeaderName="",bool_save=True,savedirbase=""): if not bool_save: print("Bagging Method has to save models!") return evalweightsFileName = "Runtime_Model_Evaluation_Weights.csv" selectvarnamesfiledir = savedirbase + os.sep + "Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName baggingweights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData #Assign task to worker RuntimeLists = [[] for i in range(n_parallels)] for runtime in range(runtimes): worker_id = runtime % n_parallels RuntimeLists[worker_id].append(runtime) #Judge bool_gpu if 'gpu' in params.get('tree_method'): bool_gpu = True else: bool_gpu = False P = Pool(n_parallels) results_parallel = [] manager = Manager() CPIDs = manager.list() for i in range(n_parallels): results_parallel.append(P.apply_async(_testMulticlassBaggingModel,(CPIDs,RuntimeLists[i],TestDataSet,VegeTypes,params,multiclassmethod,bool_gpu,n_gpus,n_parallels,\ selectruntimesvarnames,baggingweights,bool_strclass,labelHeaderName,bool_save,savedirbase))) P.close() P.join() del CPIDs pred_pY_ense = np.zeros([len(TestDataSet), len(VegeTypes)]) for i in range(n_parallels): temp = results_parallel[i] [pred_Y, pred_pY_ense_para, test_Y] = temp.get() pred_pY_ense = pred_pY_ense + pred_pY_ense_para pred_Y = np.argmax(pred_pY_ense, axis=1) return [pred_Y, pred_pY_ense, test_Y]
def testSingleclassBaggingModel(Models,TestDataSet,vtname,params,single_thres=0.5,runtimes=300,\ bool_strclass=False,labelHeaderName="",bool_save=False,savedirbase=""): ModelList = [] if bool_save: evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv" selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName ense_weights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData else: [ModelList, selectruntimesvarnames, ense_weights] = Models pred_pY_ense = np.zeros(len(TestDataSet)) for runtime in range(runtimes): print("Predicting runtime = %d" % runtime) if bool_save: savedir = savedirbase + os.sep + "runtime_" + str(runtime) modelName = vtname + '_xgboost_singleclass_run' + str( runtime) + ".model" modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[runtime] varnames = selectruntimesvarnames[runtime] [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,[vtname],varnames,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,bool_binary=True) [pred_Y, pred_pY] = xgbf.Predict(model, test_X, bool_binary=1, threshold=single_thres) pred_pY_ense = pred_pY_ense + pred_pY * ense_weights[runtime] pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_Y = pred_Y_ense pred_pY = pred_pY_ense if len(test_Y.shape) > 1: test_Y = test_Y[:, 0] return [pred_Y, pred_pY, test_Y]
def testMulticlassBaggingModel(TestDataSet,VegeTypes,params,multiclassmethod,runtimes=300,bool_strclass=False,labelHeaderName="",\ bool_save=True,savedirbase=""): if not bool_save: print("Bagging Method has to save models!") return num_class = len(VegeTypes) evalweightsFileName = "Runtime_Model_Evaluation_Weights.csv" selectvarnamesfiledir = savedirbase + os.sep + "Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName baggingweights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData pred_pY_ense = np.zeros([len(TestDataSet), num_class]) for runtime in range(runtimes): if baggingweights[runtime] == 0: print("Model not established!") continue print("Predicting runtime = %d" % runtime) savedir = savedirbase + os.sep + "runtime_" + str(runtime) if multiclassmethod == 'softmax': [pred_Y,pred_pY,test_Y]=mlc.testMulticlassSoftmaxModel([],TestDataSet,VegeTypes,selectruntimesvarnames[runtime],\ params,runtime=runtime,bool_pandas=True,bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,\ bool_save=bool_save,savedir=savedir) elif multiclassmethod == 'category': [pred_Y,pred_pY,test_Y]=mlc.testMulticlassCategoryModel([],TestDataSet,VegeTypes,selectruntimesvarnames[runtime],\ params,runtime=runtime,bool_pandas=True,bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,\ bool_save=bool_save,savedir=savedir) else: print("Invalid Multiclass Method Input!") # pred_pY_ense=pred_pY_ense+pred_pY*baggingweights[runtime] pred_Y_epd = init.expandCategories(pred_Y, num_class=num_class) pred_pY_ense = pred_pY_ense + baggingweights[ runtime] * pred_Y_epd.astype(np.float32) pred_Y = np.argmax(pred_pY_ense, axis=1) return [pred_Y, pred_pY_ense, test_Y]
colsample_bytree = 0.75 min_child_weight = 2 scale_pos_weight = 1 max_delta_step = 2 eta = 0.05 nthread = 1 threshold = 0.5 #%% #Read datasets trainDataSetFiledir = dirfrom + os.sep + trainDataSetName testDataSetFiledir = dirfrom + os.sep + testDataSetName selectVariableFiledir = dirfrom + os.sep + selectVariableName vegetypeFiledir = dirfrom + os.sep + vegetypeNames TrainDataSet = init.readCSVasPandas(trainDataSetFiledir) TestDataSet = init.readCSVasPandas(testDataSetFiledir) varnames = init.getListFromPandas(selectVariableFiledir, 'VariableName') varmeanings = init.getListFromPandas(selectVariableFiledir, 'VariableMeaning') VegeTypes = init.getListFromPandas(vegetypeFiledir, 'VegeName') num_class = len(VegeTypes) #Set XGBoost parameters params=xgbf.setParams(bool_gpu,tree_method,num_class,eval_metric,max_depth,lamb,alpha,gamma,subsample,colsample_bytree,\ min_child_weight,scale_pos_weight,eta,nthread,max_delta_step=max_delta_step,gpu_id=0) #%% #SMOTE for balanced dataset #tar_ratio is max(num. of classes)/min(num. of classes). -1 represents full balance, recommended here. if bool_smote: TrainDataSet=smote.createSMOTEDataSet(TrainDataSet,VegeTypes,varnames,method='regular',tar_ratio=-1,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName) #%%
def predictSingleclassBaggingModelMatrix_parallel(Models,MatX,vtname,varnames,params,n_gpus,n_parallels,\ single_thres=0.5,runtimes=300,filter_percent=0,bool_save=True,savedirbase=""): if not bool_save: print("Single Bagging Ensemble Only for bool_save=True!") return [] #Read weights and features file evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv" selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName baggingweights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData matshape = MatX.shape bool_mask = init.getMask(MatX) pred_X = np.zeros([matshape[0] * matshape[1], matshape[2]], dtype=np.float32) for i in range(matshape[2]): pred_X[:, i] = MatX[:, :, i].flatten() #Assign task to worker RuntimeLists = [[] for i in range(n_parallels)] for runtime in range(runtimes): worker_id = runtime % n_parallels RuntimeLists[worker_id].append(runtime) #Judge bool_gpu if 'gpu' in params.get('tree_method'): bool_gpu = True else: bool_gpu = False #Open multiprocessing parallel pools P = Pool(n_parallels) results_parallel = [] manager = Manager() CPIDs = manager.list() for i in range(n_parallels): results_parallel.append(P.apply_async(_predictSingleclassBaggingModelMatrix,(CPIDs,RuntimeLists[i],vtname,pred_X,varnames,\ selectruntimesvarnames,params,matshape,baggingweights,single_thres,bool_gpu,n_gpus,n_parallels,bool_save,savedirbase))) P.close() P.join() del CPIDs #Collect the multiprocessing results pred_pY_ense = np.zeros(matshape[0] * matshape[1], dtype=np.float32) for i in range(n_parallels): temp = results_parallel[i] pred_pY_ense_para = temp.get() pred_pY_ense = pred_pY_ense + pred_pY_ense_para pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_pY_ense = pred_pY_ense.reshape(matshape[0], matshape[1]) pred_Y_ense = pred_Y_ense.reshape(matshape[0], matshape[1]) if filter_percent > 0: p_max = np.max(np.max(pred_pY_ense[bool_mask])) pred_pY_ense[pred_pY_ense < p_max * filter_percent] = 0 return [pred_Y_ense, pred_pY_ense]
def predictSingleclassBaggingModelMatrix(Models,MatX,vtname,varnames,params,single_thres=0.5,runtimes=300,filter_percent=0,\ bool_save=False,savedirbase=""): count = 0.0 if bool_save: evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv" selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName ense_weights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData else: [ModelList, selectruntimesvarnames, ense_weights] = Models matshape = MatX.shape bool_mask = init.getMask(MatX) pred_X = np.zeros([matshape[0] * matshape[1], matshape[2]], dtype=np.float32) for i in range(matshape[2]): pred_X[:, i] = MatX[:, :, i].flatten() pred_pY_ense = np.zeros(matshape[0] * matshape[1], dtype=np.float32) time_start = time.time() for runtime in range(runtimes): print("Predicting runtime = %d..." % (runtime)) if bool_save: savedir = savedirbase + os.sep + "runtime_" + str(runtime) modelName = vtname + '_xgboost_singleclass_run' + str( runtime) + ".model" modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[runtime] selruntimevarstr = selectruntimesvarnames[runtime] selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames) pred_X_runtime = pred_X[:, selruntimevaridx] [pred_Y, pred_pY] = xgbf.Predict(model, pred_X_runtime, bool_binary=1, threshold=single_thres) pred_pY_ense = pred_pY_ense + ense_weights[runtime] * pred_pY time_stop = time.time() count = count + 1 done = count / runtimes remain = (runtimes - count) / runtimes num_day, num_hour, num_min = _calDueTime(time_start, time_stop, done, 0.0) print( "Model: %d Calculating Finished! Done: %.2f%%, Remaining: %.2f%%" % (runtime, 100 * done, 100 * remain)) print("Calculating will finish in %d Days %d Hours %d Minutes\n" % (num_day, num_hour, num_min)) pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_pY_ense = pred_pY_ense.reshape(matshape[0], matshape[1]) pred_Y_ense = pred_Y_ense.reshape(matshape[0], matshape[1]) if filter_percent > 0: p_max = np.max(np.max(pred_pY_ense[bool_mask])) pred_pY_ense[pred_pY_ense < p_max * filter_percent] = 0 return [pred_Y_ense, pred_pY_ense]
postfix = '.tif' #Set class headers labelHeaderName_H = "type" #upper system of HVCS labelHeaderName_L = "sub" #lower system of HVCS #Set base map layer directories baseMapLayerFolderdir = r"XXX" baseMapLayerFileName = "VegeMap_XGB_BAG_softmax.tif" baseMapLayerFiledir = baseMapLayerFolderdir + os.sep + baseMapLayerFileName baseMapTestResultFileName = "Real_and_Predicted_Results.csv" baseMapTestResultFiledir = baseMapLayerFolderdir + os.sep + baseMapTestResultFileName #Read and format HVCS HierRelationsFiledir = dirfrom + os.sep + hierRelationsName baseMapTestResult = init.getListFromPandas(baseMapTestResultFiledir, 'predict') realTestY = init.getListFromPandas(baseMapTestResultFiledir, 'real') [VegeTypes1,VegeTypes2,HierRelations]=hmap.getHierRelation(init.getListFromPandas(HierRelationsFiledir,labelHeaderName_H),\ init.getListFromPandas(HierRelationsFiledir,labelHeaderName_L)) #%% #Produce merged predicted test set pred_Y = hmap.predictHierUpMapping(baseMapTestResult, VegeTypes1, VegeTypes2, HierRelations) test_Y = hmap.predictHierUpMapping(realTestY, VegeTypes1, VegeTypes2, HierRelations) #Evaluate EvalueFolder = dirto xgbf.mlcEvalAndWriteResult(EvalueFolder, pred_Y, np.zeros_like(pred_Y), test_Y) #Plot confusion matrix
varlabelweights = [ -1 ] #weights of the variable clusters. [-1] is default, indicating no difference baggingmetric = 'kappa' #metric for ensembling the built base models (weighted voting) baggingweightindex = 1 #weight index for ensembling the built base models (baggingmetric^baggingweightindex as weight of each base model) baggingmetricthres = 0.75 #the threshold to filter out model performance < baggingmetricthres #%% #Read datasets trainDataSetFiledir = dirfrom + os.sep + trainDataSetName testDataSetFiledir = dirfrom + os.sep + testDataSetName selectVariableFiledir = dirfrom + os.sep + selectVariableName vegetypeFiledir = dirfrom + os.sep + vegetypeNames TrainDataSet = init.readCSVasPandas(trainDataSetFiledir) TestDataSet = init.readCSVasPandas(testDataSetFiledir) varnames = init.getListFromPandas(selectVariableFiledir, 'VariableName') varlabels = init.getListFromPandas(selectVariableFiledir, 'VariableClass') # varmeanings=init.getListFromPandas(selectVariableFiledir,'VariableMeaning') VegeTypes = init.getListFromPandas(vegetypeFiledir, 'VegeName') num_class = len(VegeTypes) #Set XGBoost parameters params=xgbf.setParams(bool_gpu,tree_method,num_class,eval_metric,max_depth,lamb,alpha,gamma,subsample,colsample_bytree,\ min_child_weight,scale_pos_weight,eta,nthread,max_delta_step=max_delta_step) #%% #SMOTE for balanced dataset #tar_ratio is max(num. of classes)/min(num. of classes). -1 represents full balance, recommended here. if bool_smote: TrainDataSet=smote.createSMOTEDataSet(TrainDataSet,VegeTypes,varnames,method='regular',tar_ratio=-1,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName) #%%
min_evalue_gain = 0.0 # max_backtrack_times = 70 #max times for evalue gain less than 0 rm_itvl = 5 #remove trival features in the selected set after every rm_itvl runtimes #(evaluation calculated by xgboost feature importance) cv_num = 1 #repeat times of k-fold cross validation skf_split = 10 #k-fold (stratified) evalue_method = 'kappa' #evaluation matric #%% #Read datasets trainDataSetFiledir = dirfrom + os.sep + trainDataSetName validDataSetFiledir = dirfrom + os.sep + validDataSetName variableIDFiledir = dirfrom + os.sep + variableIDName vegetypeFiledir = dirfrom + os.sep + vegetypeNames TrainDataSet = init.readCSVasPandas(trainDataSetFiledir) ValidDataSet = init.readCSVasPandas(validDataSetFiledir) varnames = init.getListFromPandas(variableIDFiledir, 'VariableName') VegeTypes = init.getListFromPandas(vegetypeFiledir, 'VegeName') num_class = len(VegeTypes) #Set XGBoost parameters params=xgbf.setParams(bool_gpu,tree_method,num_class,eval_metric,max_depth,lamb,alpha,gamma,subsample,colsample_bytree,\ min_child_weight,scale_pos_weight,eta,nthread,max_delta_step=max_delta_step,gpu_id=1) #%% #Remove Identical Features features_included = fs.removeIdenticalFeatures(TrainDataSet, varnames, rm_thres=rm_eq_thres) print("%d features remained.\n" % len(features_included)) #%% #SMOTE for balanced dataset
def predictMulticlassBaggingModel(MatX, nrow, ncol, varnames, num_class, params, multiclassmethod, runtimes=300, bool_save=True, savedirbase=""): count = 0.0 if not bool_save: print("Bagging Method has to save models!") return evalweightsFileName = "Runtime_Model_Evaluation_Weights.csv" selectvarnamesfiledir = savedirbase + os.sep + "Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) baggingweights = init.getListFromPandas(evalweightsFiledirto, 'weight') selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData bool_mask = init.getMask(MatX) time_start = time.time() if multiclassmethod == 'softmax': pred_pY_ense = np.zeros([nrow * ncol, num_class], dtype=np.float32) pred_X = init.fomatMulticlassSoftmaxMatrix(MatX) for runtime in range(runtimes): if baggingweights[runtime] == 0: print("Model not established!") continue selruntimevarstr = selectruntimesvarnames[runtime] selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames) pred_X_runtime = pred_X[:, selruntimevaridx] print("Predicting Bagging Model... runtime = %d" % runtime) savedir = savedirbase + os.sep + "runtime_" + str(runtime) pred_pY=mlc.predictMulticlassSoftmaxModelCvted([],pred_X_runtime,params,\ runtime=runtime,bool_save=bool_save,savedir=savedir) pred_Y = np.argmax(pred_pY, axis=1) pred_Y_epd = init.expandCategories(pred_Y, num_class=num_class) pred_pY_ense = pred_pY_ense + baggingweights[ runtime] * pred_Y_epd.astype(np.float32) time_stop = time.time() count = count + 1 done = count / runtimes remain = (runtimes - count) / runtimes num_day, num_hour, num_min = _calDueTime(time_start, time_stop, done, 0.0) print( "Model: %d Calculating Finished! Done: %.2f%%, Remaining: %.2f%%" % (runtime, 100 * done, 100 * remain)) print("Calculating will finish in %d Days %d Hours %d Minutes\n" % (num_day, num_hour, num_min)) [pred_Y, pred_pY] = init.reshapeMulticlassMatrix(pred_pY_ense, nrow, ncol, num_class, bool_onearray=False, mask=bool_mask.flatten()) elif multiclassmethod == 'category': pred_pY_ense = np.zeros([nrow * ncol, num_class], dtype=np.float32) pred_X = init.formatMulticlassCategoryMatrix(MatX, num_class) for runtime in range(runtimes): if baggingweights[runtime] == 0: print("Model not established!") continue selruntimevarstr = selectruntimesvarnames[runtime] selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames) pred_X_runtime = pred_X[:, selruntimevaridx] print("Predicting Bagging Model... runtime = %d" % runtime) savedir = savedirbase + os.sep + "runtime_" + str(runtime) pred_Y=mlc.predictMulticlassCategoryModelCvted([],pred_X_runtime,params,runtime=runtime,bool_retlabel=True,num_instance=nrow*ncol,num_class=num_class,\ bool_save=bool_save,savedir=savedir) pred_Y_epd = init.expandCategories(pred_Y, num_class=num_class) pred_pY_ense = pred_pY_ense + baggingweights[ runtime] * pred_Y_epd.astype(np.float32) time_stop = time.time() count = count + 1 done = count / runtimes remain = (runtimes - count) / runtimes num_day, num_hour, num_min = _calDueTime(time_start, time_stop, done, 0.0) print( "Model: %d Calculating Finished! Done: %.2f%%, Remaining: %.2f%%" % (runtime, 100 * done, 100 * remain)) print("Calculating will finish in %d Days %d Hours %d Minutes\n" % (num_day, num_hour, num_min)) [pred_Y, pred_pY] = init.reshapeMulticlassMatrix(pred_pY_ense, nrow, ncol, num_class, bool_onearray=False, mask=bool_mask.flatten()) return [pred_Y, pred_pY]