def _predictSingleclassBaggingModelMatrix(CPIDs,RuntimeList,vtname,pred_X,varnames,selectruntimesvarnames,params,matshape,baggingweights,\ single_thres,bool_gpu,n_gpus,n_parallels,bool_save,savedirbase): print("Predicting Singleclass Bagging Ensemble Models...") params_parallel = copy.deepcopy(params) process_pid = os.getpid() if len(CPIDs) < n_parallels: CPIDs.append(process_pid) process_pid_index = CPIDs.index(process_pid) print("Worker #%d: PID = %d" % (process_pid_index, process_pid)) if bool_gpu: params_parallel['gpu_id'] = process_pid_index % n_gpus #Execute tasks pred_pY_ense = np.zeros(matshape[0] * matshape[1], dtype=np.float32) for runtime in RuntimeList: print("Predicting Singleclass Model... runtime = %d" % runtime) savedir = savedirbase + os.sep + "runtime_" + str(runtime) modelName = vtname + '_xgboost_singleclass_run' + str( runtime) + ".model" modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params_parallel) selruntimevarstr = selectruntimesvarnames[runtime] selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames) pred_X_runtime = pred_X[:, selruntimevaridx] [pred_Y, pred_pY] = xgbf.Predict(model, pred_X_runtime, bool_binary=1, threshold=single_thres) pred_pY_ense = pred_pY_ense + baggingweights[runtime] * pred_pY print("Model: %d Calculating Finished!\n" % (runtime)) return pred_pY_ense
def testMulticlassSoftmaxModel(ModelList,TestDataSet,VegeTypes,varnames,params,runtime=-1,bool_pandas=True,\ bool_strclass=False,labelHeaderName="",bool_save=False,savedir=""): num_class = len(VegeTypes) if not len(ModelList): if runtime == -1: modelName = 'softmax_multiclass.model' else: modelName = 'softmax_multiclass_run' + str(runtime) + '.model' modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[0] if bool_pandas: [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,VegeTypes,varnames,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName) else: [test_Y, test_X] = TestDataSet if len(test_X.shape) == 1: t = np.zeros([1, len(varnames)]) t[0, :] = test_X test_X = t t = np.zeros([1, num_class]) t[0, :] = test_Y test_Y = t if not bool_strclass and len(test_Y.shape) > 1: test_Y = init.mergeCategories(test_Y) pred_pY = xgbf.Predict(model, test_X, bool_binary=False) pred_Y = np.argmax(pred_pY, axis=1) return [pred_Y, pred_pY, test_Y]
def predictMulticlassCategoryModelCvted(ModelList, pred_X, params, runtime=-1, bool_retlabel=False, num_instance=-1, num_class=-1, bool_save=False, savedir=""): if bool_save: if runtime == -1: modelName = 'category_multiclass.model' else: modelName = 'category_multiclass_run' + str(runtime) + '.model' modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[0] pred_pY = xgbf.Predict(model, pred_X, bool_binary=False) if bool_retlabel: pred_pY_reshape = np.zeros([num_instance, num_class]) for i in range(num_instance): pred_pY_reshape[i, :] = pred_pY[i * num_class:(i + 1) * num_class] pred_pY = pred_pY_reshape pred_Y = np.argmax(pred_pY, axis=1) return pred_Y else: return pred_pY
def predictMulticlassCategoryModelMatrix(ModelList, MatX, num_class, params, bool_save=False, savedir=""): matshape = MatX.shape pred_X = np.zeros([matshape[0] * matshape[1], matshape[2]], dtype=np.float32) for i in range(matshape[2]): pred_X[:, i] = MatX[:, :, i].flatten() if bool_save: modelName = 'category_multiclass.model' modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[0] pred_X = init.formatMulticlassCategoryInput([], pred_X, num_class, 0) print("Predicting......") pred_pY = xgbf.Predict(model, pred_X, bool_binary=False) pred_pY_reshape = np.zeros([matshape[0] * matshape[1], num_class]) for i in range(matshape[0] * matshape[1]): pred_pY_reshape[i, :] = pred_pY[i * num_class:(i + 1) * num_class] pred_Y = np.argmax(pred_pY_reshape, axis=1) pred_Y = pred_Y.reshape(matshape[0], matshape[1]) prob_Y = np.zeros([matshape[0], matshape[1], num_class], dtype=np.float32) for i in range(pred_pY_reshape.shape[1]): prob_Y[:, :, i] = pred_pY_reshape[:, i].reshape(matshape[0], matshape[1]) pred_pY = prob_Y return [pred_Y, pred_pY]
def predictMulticlassSoftmaxModelCvted(ModelList, pred_X, params, runtime=-1, bool_save=False, savedir=""): if bool_save: if runtime == -1: modelName = 'softmax_multiclass.model' else: modelName = 'softmax_multiclass_run' + str(runtime) + '.model' modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[0] pred_pY = xgbf.Predict(model, pred_X, bool_binary=False) return pred_pY
def testSingleclassBaggingModel(Models,TestDataSet,vtname,params,single_thres=0.5,runtimes=300,\ bool_strclass=False,labelHeaderName="",bool_save=False,savedirbase=""): ModelList = [] if bool_save: evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv" selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName ense_weights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData else: [ModelList, selectruntimesvarnames, ense_weights] = Models pred_pY_ense = np.zeros(len(TestDataSet)) for runtime in range(runtimes): print("Predicting runtime = %d" % runtime) if bool_save: savedir = savedirbase + os.sep + "runtime_" + str(runtime) modelName = vtname + '_xgboost_singleclass_run' + str( runtime) + ".model" modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[runtime] varnames = selectruntimesvarnames[runtime] [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,[vtname],varnames,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,bool_binary=True) [pred_Y, pred_pY] = xgbf.Predict(model, test_X, bool_binary=1, threshold=single_thres) pred_pY_ense = pred_pY_ense + pred_pY * ense_weights[runtime] pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_Y = pred_Y_ense pred_pY = pred_pY_ense if len(test_Y.shape) > 1: test_Y = test_Y[:, 0] return [pred_Y, pred_pY, test_Y]
def testMulticlassCategoryModel(ModelList,TestDataSet,VegeTypes,varnames,params,runtime=-1,bool_pandas=True,\ bool_strclass=False,labelHeaderName="",bool_save=False,savedir=""): num_class = len(VegeTypes) if not len(ModelList): if runtime == -1: modelName = 'category_multiclass.model' else: modelName = 'category_multiclass_run' + str(runtime) + '.model' modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[0] if bool_pandas: [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,VegeTypes,varnames,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName) else: [test_Y, test_X] = TestDataSet xshape = test_X.shape flag = len(xshape) if flag == 1: t = np.zeros([1, len(varnames)]) t[0, :] = test_X test_X = t t = np.zeros([1, num_class]) t[0, :] = test_Y test_Y = t if not bool_strclass and len(test_Y.shape) > 1: test_Y = init.mergeCategories(test_Y) num_instance = test_X.shape[0] test_X = init.formatMulticlassCategoryInput([], test_X, num_class, 0) pred_pY = xgbf.Predict(model, test_X, bool_binary=False) if flag == 1: t = np.zeros([1, num_class]) t[0, :] = pred_pY pred_pY = t else: pred_pY_reshape = np.zeros([num_instance, num_class]) for i in range(num_instance): pred_pY_reshape[i, :] = pred_pY[i * num_class:(i + 1) * num_class] pred_pY = pred_pY_reshape pred_Y = np.argmax(pred_pY, axis=1) return [pred_Y, pred_pY, test_Y]
def predictMulticlassSoftmaxModelMatrix(ModelList, MatX, params, bool_save=False, savedir=""): matshape = MatX.shape if bool_save: modelName = 'softmax_multiclass.model' modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[0] pred_pY = xgbf.predictMultiMatrix(model, MatX, bool_binary=False) pred_Y = np.argmax(pred_pY, axis=1) pred_Y = pred_Y.reshape(matshape[0], matshape[1]) prob_Y = np.zeros([matshape[0], matshape[1], pred_pY.shape[1]], dtype=np.float32) for i in range(pred_pY.shape[1]): prob_Y[:, :, i] = pred_pY[:, i].reshape(matshape[0], matshape[1]) pred_pY = prob_Y return [pred_Y, pred_pY]
def _testSingleclassBaggingModel(CPIDs,RuntimeList,TestDataSet,vtname,runtime,params,ModelList,bool_gpu,n_gpus,n_parallels,\ selectruntimesvarnames,baggingweights,single_thres,bool_strclass,labelHeaderName,\ bool_save,savedirbase): print("Predicting Singleclass Bagging Ensemble Models...") params_parallel = copy.deepcopy(params) process_pid = os.getpid() if len(CPIDs) < n_parallels: CPIDs.append(process_pid) process_pid_index = CPIDs.index(process_pid) print("Worker #%d: PID = %d" % (process_pid_index, process_pid)) if bool_gpu: params_parallel['gpu_id'] = process_pid_index % n_gpus pred_pY_ense = np.zeros(len(TestDataSet)) for runtime in RuntimeList: print("Predicting runtime = %d" % runtime) if bool_save: savedir = savedirbase + os.sep + "runtime_" + str(runtime) modelName = vtname + '_xgboost_singleclass_run' + str( runtime) + ".model" modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params_parallel) else: model = ModelList[runtime] varnames = selectruntimesvarnames[runtime] [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,[vtname],varnames,\ bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,bool_binary=True) [pred_Y, pred_pY] = xgbf.Predict(model, test_X, bool_binary=1, threshold=single_thres) pred_pY_ense = pred_pY_ense + pred_pY * baggingweights[runtime] pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_Y = pred_Y_ense pred_pY = pred_pY_ense if len(test_Y.shape) > 1: test_Y = test_Y[:, 0] return [pred_Y, pred_pY, test_Y]
def predictSingleclassBaggingModelMatrix(Models,MatX,vtname,varnames,params,single_thres=0.5,runtimes=300,filter_percent=0,\ bool_save=False,savedirbase=""): count = 0.0 if bool_save: evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv" selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv" evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName ense_weights = init.getListFromPandas(evalweightsFiledirto, 'weight') selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir) selectruntimesvarnames = [] for runtime in range(runtimes): selectruntimesvarnames.append( init.getListFrompdDataSet(selrunvarspdData, "SelectVarName_run" + str(runtime))) del selrunvarspdData else: [ModelList, selectruntimesvarnames, ense_weights] = Models matshape = MatX.shape bool_mask = init.getMask(MatX) pred_X = np.zeros([matshape[0] * matshape[1], matshape[2]], dtype=np.float32) for i in range(matshape[2]): pred_X[:, i] = MatX[:, :, i].flatten() pred_pY_ense = np.zeros(matshape[0] * matshape[1], dtype=np.float32) time_start = time.time() for runtime in range(runtimes): print("Predicting runtime = %d..." % (runtime)) if bool_save: savedir = savedirbase + os.sep + "runtime_" + str(runtime) modelName = vtname + '_xgboost_singleclass_run' + str( runtime) + ".model" modeldir = savedir + os.sep + modelName model = xgbf.loadModel(modeldir, params) else: model = ModelList[runtime] selruntimevarstr = selectruntimesvarnames[runtime] selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames) pred_X_runtime = pred_X[:, selruntimevaridx] [pred_Y, pred_pY] = xgbf.Predict(model, pred_X_runtime, bool_binary=1, threshold=single_thres) pred_pY_ense = pred_pY_ense + ense_weights[runtime] * pred_pY time_stop = time.time() count = count + 1 done = count / runtimes remain = (runtimes - count) / runtimes num_day, num_hour, num_min = _calDueTime(time_start, time_stop, done, 0.0) print( "Model: %d Calculating Finished! Done: %.2f%%, Remaining: %.2f%%" % (runtime, 100 * done, 100 * remain)) print("Calculating will finish in %d Days %d Hours %d Minutes\n" % (num_day, num_hour, num_min)) pred_Y_ense = (pred_pY_ense >= single_thres) * 1 pred_pY_ense = pred_pY_ense.reshape(matshape[0], matshape[1]) pred_Y_ense = pred_Y_ense.reshape(matshape[0], matshape[1]) if filter_percent > 0: p_max = np.max(np.max(pred_pY_ense[bool_mask])) pred_pY_ense[pred_pY_ense < p_max * filter_percent] = 0 return [pred_Y_ense, pred_pY_ense]