def predict(self):
     try:
         try:
             import statsmodels.api as sm
         except:
             raise ImportError("statsmodels.api cannot import")
         # model = self.load_model("linerRegression")
         self.model = self.load_model_by_database(self.config["algorithm"],
                                                  self.config["model"])
         res = {}
         if self.config['oneSample']:
             if len(self.config['X']) == 0 or self.config['X'][0] == "":
                 raise ValueError(
                     "feature must not be empty when one-sample")
             if "const" in self.model.params:
                 X = [1.] + [float(x) for x in self.config['X']]
             else:
                 X = [float(x) for x in self.config['X']]
             X = self.get_poly_data_from_model_params(X)
             res.update({
                 "data": [[
                     ",".join(self.model.params.index[1:]),
                     "{:.4f}".format(self.model.predict(X)[0])
                 ]],
                 "title":
                 "单样本预测结果",
                 "col": ["样本特征", "模型预测结果"],
             })
         else:
             # 从数据库拿数据
             if not self.config['tableName']:
                 raise ValueError(
                     "cannot find table data when multi-sample")
             data = self.exec_sql(self.config['tableName'],
                                  self.config['X'])
             log.info("输入数据大小:{}".format(len(data)))
             data = data.astype(float)
             if "const" in self.model.params:
                 data = sm.add_constant(data)
             data = self.get_poly_data_from_model_params(data)
             data["predict"] = self.model.predict(data)
             data.drop(["const"], axis=1, inplace=True)
             res.update(
                 transform_table_data_to_html({
                     "data": data.values.tolist(),
                     "title": "多样本预测结果",
                     "col": data.columns.tolist(),
                     "row": data.index.tolist()
                 }))
         response_data = {"res": res, "code": "200", "msg": "ok!"}
         return response_data
     except Exception as e:
         log.exception("Exception Logged")
         return {"data": "", "code": "500", "msg": "{}".format(e.args)}
Example #2
0
 def predict(self):
     try:
         # model = self.load_model("randomForest")
         model = self.load_model_by_database(self.config["algorithm"],
                                             self.config["model"])
         res = {}
         if self.config['oneSample']:
             if not self.config['X']:
                 raise ValueError(
                     "feature must not be empty when one-sample")
             X = [[float(x) for x in self.config['X']]]
             predict = model.predict(X)[0] if isinstance(
                 model.predict(X)[0], str) else "{:.0f}".format(
                     model.predict(X)[0])
             res.update({
                 "data":
                 [[",".join([str(s) for s in self.config['X']]), predict]],
                 "title":
                 "单样本预测结果",
                 "col": ["样本特征", "模型预测结果"],
             })
         else:
             # 从数据库拿数据
             if not self.config['tableName'] or self.config[
                     'tableName'] == "":
                 raise ValueError(
                     "cannot find table data when multi-sample")
             data = self.table_data
             log.info("输入数据大小:{}".format(len(data)))
             data = data.astype(float)
             data["predict"] = model.predict(data.values)
             if data["predict"].dtypes != "object":
                 data = format_dataframe(data, {"predict": ".0f"})
             res.update(
                 transform_table_data_to_html({
                     "data": data.values.tolist(),
                     "title": "多样本预测结果",
                     "col": data.columns.tolist(),
                     "row": data.index.tolist()
                 }))
         response_data = {"res": res, "code": "200", "msg": "ok!"}
         return response_data
     except Exception as e:
         # raise e
         log.exception("Exception Logged")
         return {"data": "", "code": "500", "msg": "{}".format(e.args)}
 def visualization(self):
     """
     接口请求参数
         "tableName": "advertising",  # str,数据库表名
         "X": ["TV", "radio", "newspaper"],  # list,自变量,当表格方向为h时表示多个变量名,为v时表示分类变量字段
         "Y": ["sales"],  # list,因变量,当表格方向为v是使用
         "show_options": ["y_count", "pairs", "corr", "y_corr"], # 展示选项
         "x_count": [], # list,选择要展示频率分布直方图的自变量
         "box": [], # list,选择要展示箱型图的自变量
     :return:
     """
     try:
         res = []
         self.table_data = self.table_data.astype("float")
         data = self.table_data.describe()
         res.append(
             transform_table_data_to_html({
                 "data": data.values.tolist(),
                 "title": "描述性统计分析",
                 "col": data.columns.tolist(),
                 "row": data.index.tolist()
             }))
         if self.config.get("x_count") and self.config.get("x_count")[0]:
             for x in self.config["x_count"]:
                 sns.distplot(self.table_data[x], kde=False)
                 # 显示纵轴标签
                 plt.ylabel("frequency")
                 # 显示图标题
                 # plt.title("{} - frequency distribution histogram".format(x))
                 res.append({
                     "title":
                     "{} - 频率分布".format(x),
                     "base64":
                     "{}".format(self.plot_and_output_base64_png(plt))
                 })
         if "y_count" in self.config["show_options"]:
             sns.distplot(self.table_data[self.config["Y"][0]], kde=False)
             # 显示横轴标签
             plt.xlabel("section")
             # 显示纵轴标签
             plt.ylabel("frequency")
             # 显示图标题
             # plt.title("y frequency distribution histogram")
             res.append({
                 "title":
                 "{} - 频率分布".format(self.config["Y"][0]),
                 "base64":
                 "{}".format(self.plot_and_output_base64_png(plt))
             })
         if self.config.get("box") and self.config.get("box")[0]:
             for x in self.config["box"]:
                 sns.boxplot(self.table_data[x], palette="Set2", orient="v")
                 # 显示图标题
                 # plt.title("{} - Box distribution to check outliers".format(x))
                 res.append({
                     "title":
                     "{} - 箱型图".format(x),
                     "base64":
                     "{}".format(self.plot_and_output_base64_png(plt))
                 })
         if "pairs" in self.config["show_options"]:
             sns.pairplot(self.table_data)
             # plt.title("Variable relation in pairs")
             res.append({
                 "title":
                 "变量两两关系图",
                 "base64":
                 "{}".format(self.plot_and_output_base64_png(plt))
             })
         if "corr" in self.config["show_options"]:
             corr = self.table_data.corr()
             sns.heatmap(corr,
                         xticklabels=corr.columns,
                         yticklabels=corr.columns,
                         linewidths=0.2,
                         cmap="YlGnBu",
                         annot=True)
             # plt.title("Correlation between variables")
             res.append({
                 "title":
                 "相关系数图",
                 "base64":
                 "{}".format(self.plot_and_output_base64_png(plt))
             })
         if "y_corr" in self.config["show_options"]:
             self.table_data.corr()[self.config["Y"][0]].sort_values(
                 ascending=False).plot(kind='bar')
             # plt.title("Correlations between y and x")
             res.append({
                 "title":
                 "因变量和各自变量的相关系数图",
                 "base64":
                 "{}".format(self.plot_and_output_base64_png(plt))
             })
         response_data = {"res": res, "code": "200", "msg": "ok!"}
         return response_data
     except Exception as e:
         return {"data": "", "code": "500", "msg": "{}".format(e.args)}