def predict(self): try: try: import statsmodels.api as sm except: raise ImportError("statsmodels.api cannot import") # model = self.load_model("linerRegression") self.model = self.load_model_by_database(self.config["algorithm"], self.config["model"]) res = {} if self.config['oneSample']: if len(self.config['X']) == 0 or self.config['X'][0] == "": raise ValueError( "feature must not be empty when one-sample") if "const" in self.model.params: X = [1.] + [float(x) for x in self.config['X']] else: X = [float(x) for x in self.config['X']] X = self.get_poly_data_from_model_params(X) res.update({ "data": [[ ",".join(self.model.params.index[1:]), "{:.4f}".format(self.model.predict(X)[0]) ]], "title": "单样本预测结果", "col": ["样本特征", "模型预测结果"], }) else: # 从数据库拿数据 if not self.config['tableName']: raise ValueError( "cannot find table data when multi-sample") data = self.exec_sql(self.config['tableName'], self.config['X']) log.info("输入数据大小:{}".format(len(data))) data = data.astype(float) if "const" in self.model.params: data = sm.add_constant(data) data = self.get_poly_data_from_model_params(data) data["predict"] = self.model.predict(data) data.drop(["const"], axis=1, inplace=True) res.update( transform_table_data_to_html({ "data": data.values.tolist(), "title": "多样本预测结果", "col": data.columns.tolist(), "row": data.index.tolist() })) response_data = {"res": res, "code": "200", "msg": "ok!"} return response_data except Exception as e: log.exception("Exception Logged") return {"data": "", "code": "500", "msg": "{}".format(e.args)}
def predict(self): try: # model = self.load_model("randomForest") model = self.load_model_by_database(self.config["algorithm"], self.config["model"]) res = {} if self.config['oneSample']: if not self.config['X']: raise ValueError( "feature must not be empty when one-sample") X = [[float(x) for x in self.config['X']]] predict = model.predict(X)[0] if isinstance( model.predict(X)[0], str) else "{:.0f}".format( model.predict(X)[0]) res.update({ "data": [[",".join([str(s) for s in self.config['X']]), predict]], "title": "单样本预测结果", "col": ["样本特征", "模型预测结果"], }) else: # 从数据库拿数据 if not self.config['tableName'] or self.config[ 'tableName'] == "": raise ValueError( "cannot find table data when multi-sample") data = self.table_data log.info("输入数据大小:{}".format(len(data))) data = data.astype(float) data["predict"] = model.predict(data.values) if data["predict"].dtypes != "object": data = format_dataframe(data, {"predict": ".0f"}) res.update( transform_table_data_to_html({ "data": data.values.tolist(), "title": "多样本预测结果", "col": data.columns.tolist(), "row": data.index.tolist() })) response_data = {"res": res, "code": "200", "msg": "ok!"} return response_data except Exception as e: # raise e log.exception("Exception Logged") return {"data": "", "code": "500", "msg": "{}".format(e.args)}
def visualization(self): """ 接口请求参数 "tableName": "advertising", # str,数据库表名 "X": ["TV", "radio", "newspaper"], # list,自变量,当表格方向为h时表示多个变量名,为v时表示分类变量字段 "Y": ["sales"], # list,因变量,当表格方向为v是使用 "show_options": ["y_count", "pairs", "corr", "y_corr"], # 展示选项 "x_count": [], # list,选择要展示频率分布直方图的自变量 "box": [], # list,选择要展示箱型图的自变量 :return: """ try: res = [] self.table_data = self.table_data.astype("float") data = self.table_data.describe() res.append( transform_table_data_to_html({ "data": data.values.tolist(), "title": "描述性统计分析", "col": data.columns.tolist(), "row": data.index.tolist() })) if self.config.get("x_count") and self.config.get("x_count")[0]: for x in self.config["x_count"]: sns.distplot(self.table_data[x], kde=False) # 显示纵轴标签 plt.ylabel("frequency") # 显示图标题 # plt.title("{} - frequency distribution histogram".format(x)) res.append({ "title": "{} - 频率分布".format(x), "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) if "y_count" in self.config["show_options"]: sns.distplot(self.table_data[self.config["Y"][0]], kde=False) # 显示横轴标签 plt.xlabel("section") # 显示纵轴标签 plt.ylabel("frequency") # 显示图标题 # plt.title("y frequency distribution histogram") res.append({ "title": "{} - 频率分布".format(self.config["Y"][0]), "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) if self.config.get("box") and self.config.get("box")[0]: for x in self.config["box"]: sns.boxplot(self.table_data[x], palette="Set2", orient="v") # 显示图标题 # plt.title("{} - Box distribution to check outliers".format(x)) res.append({ "title": "{} - 箱型图".format(x), "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) if "pairs" in self.config["show_options"]: sns.pairplot(self.table_data) # plt.title("Variable relation in pairs") res.append({ "title": "变量两两关系图", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) if "corr" in self.config["show_options"]: corr = self.table_data.corr() sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, linewidths=0.2, cmap="YlGnBu", annot=True) # plt.title("Correlation between variables") res.append({ "title": "相关系数图", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) if "y_corr" in self.config["show_options"]: self.table_data.corr()[self.config["Y"][0]].sort_values( ascending=False).plot(kind='bar') # plt.title("Correlations between y and x") res.append({ "title": "因变量和各自变量的相关系数图", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) response_data = {"res": res, "code": "200", "msg": "ok!"} return response_data except Exception as e: return {"data": "", "code": "500", "msg": "{}".format(e.args)}