def analysis(fileindex, xselected, yselected, analytype, criterion, direction): #根据文件和选择的x值和y值,生成model conn = getconn() train = pickle.loads(conn.hget(fileindex, 'train')) test = pickle.loads(conn.hget(fileindex, 'test')) est = pickle.loads(conn.hget(fileindex, 'est')) if conn.hexists(fileindex, 'xselected_change'): xselected_change = pickle.loads( conn.hget(fileindex, 'xselected_change')) else: xselected_change = 'None' if (est != None): #理论f值 from scipy.stats import f p = est.df_model # 自变量个数 n = train.shape[0] # 行数,观测个数 F_Theroy = f.ppf(q=0.95, dfn=p, dfd=n - p - 1) return { 'model': est, 'f1': round(est.fvalue, 3), 'f2': round(F_Theroy, 3), 'xselected_change': xselected_change } else: setmodel(fileindex, xselected, yselected, analytype, criterion, direction) data = analysis(fileindex, xselected, yselected, analytype, criterion, direction) return data
def setmodel(fileindex, xselected, yselected, analytype, criterion, direction): conn = getconn() newProfit = pickle.loads(conn.hget(fileindex, 'Profit')) if (analytype == "linear"): train, test = model_selection.train_test_split(newProfit, test_size=0.2, random_state=22) x = train[xselected] X = sm.add_constant(x) y = train[yselected] est = sm.OLS(y, X) est = est.fit() # print(type(est.params)) # print(type(est.params.index.tolist())) # print(est.params.index.tolist()) # print(type(est.params.values.tolist())) #redis filedata = {} filedata["train"] = pickle.dumps(train) filedata["test"] = pickle.dumps(test) filedata["est"] = pickle.dumps(est) conn.hset(fileindex, mapping=filedata) conn.expire(fileindex, 60 * 60 * 2) elif (analytype == "gradually"): data_train, data_test = model_selection.train_test_split( newProfit, test_size=0.2, random_state=22) s = [] for x in xselected: s.append(x) s.append(yselected) F = FeatureSelection().stepwise(df=data_train[s], response=yselected, max_iter=200, criterion=criterion, direction=direction) est = F.stepwise_model xselected_change = F.stepwise_feat_selected_ # data = Files.get(fileindex) # data["train"] = data_train # data["test"] = data_test # data["est"] = est # data["xselected_change"] = xselected_change #redis filedata = {} filedata["train"] = pickle.dumps(data_train) filedata["test"] = pickle.dumps(data_test) filedata["est"] = pickle.dumps(est) filedata["xselected_change"] = pickle.dumps(xselected_change) conn.hset(fileindex, mapping=filedata) conn.expire(fileindex, 60 * 60 * 2)
def savefile(file, sheet, fid): from analysis.linear.regression import returncloumns p = returncloumns(file, sheet) #使用redis进行存、取数据 conn = getconn() #存数据 data = {} data["Profit"] = pickle.dumps(p) conn.hmset(fid, data) conn.expire(fid, 60 * 60 * 2)
def uploadpre_file(request): #上传多值预测文件 import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt file = request.FILES.get("file") filename = file.name fileindex = request.POST.get("fileindex") xselected = request.POST.get("xselected") yselected = request.POST.get("yselected") conn = getconn() Profit = pd.read_excel(file) Profit.dropna(inplace=True) est = pickle.loads(conn.hget(fileindex, 'est')) params = est.params.index.tolist() if 'const' in params: params.remove('const') try: Profit = Profit[params] Profit = sm.add_constant(Profit) y_pred = est.predict(Profit) Profit[yselected + "(预测值)"] = y_pred if 'const' in Profit.columns.values.tolist(): Profit = Profit.drop('const', axis=1) Profit = round(Profit, 3) Profit = Profit.to_dict('records') # 散点图 plt.scatter(range(1, len(y_pred) + 1), y_pred, alpha=0.4, edgecolor='none') sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() #Profit = [{"a":1,"b":2,"c":3},{"a":4,"b":5,"c":6}] mul_pre_result = {"mul_pre_values": Profit, "src": src} #ret1 = json.loads(json.dumps(Profit, ensure_ascii=False)) return JsonResponse({ "result": 1, "mul_pre_result": mul_pre_result }, json_dumps_params={'ensure_ascii': False}) except KeyError as e: data = {"result": 500, "except": "keyerror"} return JsonResponse(data, json_dumps_params={'ensure_ascii': False}) except Exception as e: data = {"result": 500, "except": "error"} return JsonResponse(data, json_dumps_params={'ensure_ascii': False}) except BaseException as e: print(e)
def prediction(fileindex, xselected, yselected): conn = getconn() train = pickle.loads(conn.hget(fileindex, 'train')) test = pickle.loads(conn.hget(fileindex, 'test')) est = pickle.loads(conn.hget(fileindex, 'est')) if conn.hexists(fileindex, 'xselected_change'): xselected_change = pickle.loads( conn.hget(fileindex, 'xselected_change')) else: xselected_change = None if (xselected_change == None): #没有xselected_change证明是线性回归 x_test = test[xselected] y_test = test[yselected] X_test = sm.add_constant(x_test) y_pred = est.predict(X_test) plt.scatter(y_test, y_pred) plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--') plt.xlabel('实际值') plt.ylabel('预测值') sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return src else: #有xselected_change证明是逐步回归 x_test = test[xselected_change] y_test = test[yselected] X_test = sm.add_constant(x_test) y_pred = est.predict(X_test) plt.scatter(y_test, y_pred) plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--') plt.xlabel('实际值') plt.ylabel('预测值') sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return src
def variance(fileindex, xselected, yselected, oselected_1, oselected_2): conn = getconn() if conn.hexists(fileindex, 'est2'): est2 = pickle.loads(conn.hget(fileindex, 'est2')) else: est2 = None if conn.hexists(fileindex, 'none_outliers'): none_outliers = pickle.loads(conn.hget(fileindex, 'none_outliers')) else: none_outliers = None # 残差方差齐性检验 ) ax1 = plt.subplot2grid(shape=(2, 1), loc=(0, 0)) # 设置第一张子图位置 # 散点图绘制 # 学生化残差与自变量散点图 # ax1.scatter(none_outliers["蒸汽流量 "], none_outliers.resid_stu # 标准化残差和自变量散点图 ax1.scatter(none_outliers[oselected_1], (est2.resid - est2.resid.mean()) / est2.resid.std()) # 添加水平参考线 ax1.hlines(y=0, xmin=none_outliers[oselected_1].min(), xmax=none_outliers[oselected_1].max(), color='red', linestyle='--') ax1.set_xlabel(oselected_1) ax1.set_ylabel('Std_Residual') ax2 = plt.subplot2grid(shape=(2, 1), loc=(1, 0)) # 学生化残差与自变量散点图 # ax2.scatter(none_outliers["拔风压力PID阀门开度 "], none_outliers.resid_stu ) # 标准化残差和自变量散点图 ax2.scatter(none_outliers[oselected_2], (est2.resid - est2.resid.mean()) / est2.resid.std()) ax2.hlines(y=0, xmin=none_outliers[oselected_2].min(), xmax=none_outliers[oselected_2].max(), color='magenta', linestyle='--') ax2.set_xlabel(oselected_2) ax2.set_ylabel('Std_Residual') # 调整2子图之间距离 plt.subplots_adjust(hspace=0.6, wspace=0.3) sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return src
def residual(fileindex, xselected, yselected): conn = getconn() if conn.hexists(fileindex, 'est2'): est2 = pickle.loads(conn.hget(fileindex, 'est2')) else: est2 = None if (est2 != None): from statsmodels.stats.stattools import (durbin_watson) DW = ["%#8.3f" % durbin_watson(est2.wresid)] return DW else: setcurmodel(fileindex, xselected, yselected) data = residual(fileindex, xselected, yselected) return data
def varbp(fileindex, xselected, yselected): conn = getconn() if conn.hexists(fileindex, 'est2'): est2 = pickle.loads(conn.hget(fileindex, 'est2')) else: est2 = None if (est2 != None): BP = sm.stats.diagnostic.het_breuschpagan(est2.resid, exog_het=est2.model.exog) return BP else: setcurmodel(fileindex, xselected, yselected) data = varbp(fileindex, xselected, yselected) return data
def norks(fileindex, yselected): #正态性检验的K-S检验 conn = getconn() train = pickle.loads(conn.hget(fileindex, 'train')) test = pickle.loads(conn.hget(fileindex, 'test')) if (len(train) >= 5000): data = stats.kstest(rvs=train[yselected], args=(train[yselected].mean(), train[yselected].std()), cdf="norm") type = 'kstest' else: data = stats.shapiro(train[yselected]) type = 'shapiro' #return {'type':type,'data':round(data,3)} return {'type': type, 'data': data}
def getsin_pre_value(request): #获取模型预测值 if request.method == "POST": data = json.loads(request.body) fileindex = data["fileindex"] params = list(map(float, data["params"])) conn = getconn() est = pickle.loads(conn.hget(fileindex, 'est')) if 'const' in est.params.index.tolist(): params.insert(0, 1) params = np.array(params) sin_pre_value = est.params.values * params sin_pre_value = sin_pre_value.sum() sin_pre_value = {"result": 1, "sin_pre_value": round(sin_pre_value, 3)} return JsonResponse(sin_pre_value, json_dumps_params={'ensure_ascii': False})
def linear_correlation(fileindex, lineselected): import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt conn = getconn() newProfit = pickle.loads(conn.hget(fileindex, 'Profit')) linedata = newProfit[lineselected].corr() #print(linedata) sns.pairplot(newProfit.loc[:, lineselected]) sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return {'src': src, 'lindata': round(linedata, 3).values.tolist()}
def multicol(fileindex, xselected): # 返回的是二维数组 conn = getconn() newProfit = pickle.loads(conn.hget(fileindex, 'Profit')) if conn.hexists(fileindex, 'xselected_change'): xselected_change = pickle.loads( conn.hget(fileindex, 'xselected_change')) else: xselected_change = None if (xselected_change == None): # 没有xselected_change证明是线性回归 x = newProfit[xselected] else: x = newProfit[xselected_change] from statsmodels.stats.outliers_influence import variance_inflation_factor X = sm.add_constant(x.loc[:, :]) vif = pd.DataFrame() vif['features'] = X.columns vif["VIF Factor"] = [ variance_inflation_factor(X.values, i) for i in range(X.shape[1]) ] return round(vif, 3).values.tolist() #将ndarray类型转为list
def sendselect(request): #用户选择x轴和y轴,进行回归分析,返回模型数据 if request.method == "POST": #print(request.body) fileindex = json.loads(request.COOKIES.get("fileindex")) xselected = json.loads(request.COOKIES.get("xselected")) yselected = json.loads(request.COOKIES.get("yselected")) analytype = json.loads(request.COOKIES.get("analytype")) criterion = json.loads(request.COOKIES.get("criterion")) direction = json.loads(request.COOKIES.get("direction")) xlist = json.loads(request.COOKIES.get("xlist")) conn = getconn() if (conn.exists(fileindex)): conn.expire(fileindex, 60 * 60 * 2) if (analytype == "linear" and conn.hexists(fileindex, 'xselected_change')): conn.hdel(fileindex, 'xselected_change') return sendselecthelp(fileindex, xselected, yselected, analytype, criterion, direction, xlist) else: responsedata = {"result": 404, "msg": '上传的文件已过期,请重新上传'} return JsonResponse(responsedata, json_dumps_params={'ensure_ascii': False})
def normality(fileindex, yselected): #正态性检验 import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt import seaborn as sns conn = getconn() newProfit = pickle.loads(conn.hget(fileindex, 'Profit')) y = newProfit[yselected] mpl.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False sns.distplot(a=y, bins=10, fit=stats.norm, norm_hist=True, hist_kws={ 'color': 'green', 'edgecolor': 'black', }, kde_kws={ 'color': 'black', 'linestyle': '--', 'label': '核密度曲线' }, fit_kws={ 'color': 'red', 'linestyle': ':', 'label': '正态密度曲线' }) plt.legend() sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return src
def setcurmodel(fileindex, xselected, yselected): conn = getconn() # data = Files.get(fileindex) # train = data.get("train") # test = data.get("test") # est = data.get("est") # xselected_change = data.get("xselected_change") train = pickle.loads(conn.hget(fileindex, 'train')) test = pickle.loads(conn.hget(fileindex, 'test')) est = pickle.loads(conn.hget(fileindex, 'est')) if conn.hexists(fileindex, 'xselected_change'): xselected_change = pickle.loads( conn.hget(fileindex, 'xselected_change')) else: xselected_change = None if (xselected_change == None): # 没有xselected_change证明是线性回归 usedx = xselected else: usedx = xselected_change if (est != None): # train, test = model_selection.train_test_split(Files.get(fileindex).get("Profit"), test_size=0.2,random_state=22) x = train[usedx] X = sm.add_constant(x) y = train[yselected] # est = sm.OLS(y, X) # est = est.fit() outliers = est.get_influence() resid_stu = outliers.resid_studentized_external contatl = pd.Series(resid_stu, name='resid_stu') x = x.reset_index(drop=True) y = y.reset_index(drop=True) profit_outliers = pd.concat([x, y, contatl], axis=1) # 求异常值 outdata = profit_outliers.loc[np.abs(profit_outliers.resid_stu) > 2, ] round(outdata, 3) outlist = [] ls = copy.deepcopy(usedx) ls.append(yselected) ls.append('resid_stu') for l in outdata.values.tolist(): outlist.append(dict(zip(ls, l))) none_outliers = profit_outliers.loc[ np.abs(profit_outliers.resid_stu) <= 2, ] x2 = none_outliers[usedx] y2 = none_outliers[yselected] X2 = sm.add_constant(x2.loc[:, :]) est2 = sm.OLS(y2, X2).fit() # # data=Files.get(fileindex) # data["est2"]=est2 # data["outlist"]=outlist # data["none_outliers"]=none_outliers #redis filedata = {} filedata["est2"] = pickle.dumps(est2) filedata["outlist"] = pickle.dumps(outlist) filedata["none_outliers"] = pickle.dumps(none_outliers) conn.hset(fileindex, mapping=filedata) conn.expire(fileindex, 60 * 60 * 2) else: pass
def outliertest(fileindex, xselected, yselected): conn = getconn() train = pickle.loads(conn.hget(fileindex, 'train')) test = pickle.loads(conn.hget(fileindex, 'test')) est = pickle.loads(conn.hget(fileindex, 'est')) if conn.hexists(fileindex, 'est2'): est2 = pickle.loads(conn.hget(fileindex, 'est2')) else: est2 = None if conn.hexists(fileindex, 'outlist'): outlist = pickle.loads(conn.hget(fileindex, 'outlist')) else: outlist = None if conn.hexists(fileindex, 'xselected_change'): xselected_change = pickle.loads( conn.hget(fileindex, 'xselected_change')) else: xselected_change = None if (est2 != None): if (xselected_change == None): # 没有xselected_change证明是线性回归 x_test = test[xselected] y_test = test[yselected] X_test = sm.add_constant(x_test) y_pred = est.predict(X_test) # 画图 plt.scatter(y_test, y_pred) plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--') plt.xlabel('实际值') plt.ylabel('预测值') sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return { 'model': est2.summary().as_html(), 'outdata': outlist, 'src': src } else: x_test = test[xselected_change] y_test = test[yselected] X_test = sm.add_constant(x_test) y_pred = est.predict(X_test) #画图 plt.scatter(y_test, y_pred) plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--') plt.xlabel('实际值') plt.ylabel('预测值') sio = BytesIO() plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0) data = base64.encodebytes(sio.getvalue()).decode() src = 'data:image/png;base64,' + str(data) # 记得关闭,不然画出来的图是重复的 plt.axis('off') plt.close() return { 'model': est2.summary().as_html(), 'outdata': outlist, 'src': src } else: setcurmodel(fileindex, xselected, yselected) data = outliertest(fileindex, xselected, yselected) return data