def run(self, df, x, y, extra_args={'bins': 10}): #msg={'error':None,'warning':None} tsy = df[x] tsx = df[y] tsy = tsy.reset_index(drop=True) tsx = tsx.reset_index(drop=True) msg = {} xl = len(tsx) yl = len(tsy) if xl != yl: msg['error'] = '输入的tsx的长度为:%s 不等于输入的tsy的长度: %s !\n ' % (xl, yl) return {'result': pd.DataFrame(), 'msg': msg} self.bins = extra_args.get('bins') if not isSeries(tsy) & isSeries(tsx): msg['error'] = 'tsx或者tsy不是 pandas Series 数据类型!\n' return {'result': pd.DataFrame(), 'msg': msg} else: if not isCategory(tsy): tsy = pd.cut(tsy, bins=self.bins) msg['warning'] = '列tsy不是定类(category)数据, 将强制通过bins:%d为转化为定类型数据\n' % self.bins if not isCategory(tsx): tsx = pd.cut(tsx, bins=self.bins) if msg.get('warning'): msg['warning'] = msg[ 'warning'] + '列tsx不是定类(category)数据, 将强制通过bins:%d为转化为定类型数据\n' % self.bins else: msg['warning'] = 't列tsx不是定类(category)数据, 将强制通过bins:%d为转化为定类型数据\n' % self.bins dfres, msg1 = core(tsx, tsy, method) msg = {**msg, **msg1} return { 'tables': [{ 'table_json': dfres.T.reset_index().to_json(orient='index'), 'table_html': dfres.to_html(), 'table_info': '卡方检验分析结果', 'chart': ['heatmap', 'line', 'bar'] }], 'conf': self.get_info(), 'msg': msg }, [{ 'table_df': dfres, 'label': '卡方检验分析结果' }]
def run(self, tsx, tsy, bins=10): msg = {} tsy = tsy.reset_index(drop=True) tsx = tsx.reset_index(drop=True) msg = {} xl = len(tsx) yl = len(tsy) if xl != yl: logging.error( 'the length of input X:%s is not equal the length of Y: %s ! ' % (xl, yl)) msg['error'] = '输入的tsx的长度为:%s 不等于输入的tsy的长度: %s ! ' % (xl, yl) return {'result': pd.DataFrame(), 'msg': msg} self.bins = bins if not isSeries(tsy) & isSeries(tsx): logging.error('X or y data are not a pandas Series type!') msg['error'] = 'tsx或者tsy不是 pandas Series 数据类型!' return {'result': pd.DataFrame(), 'msg': msg} else: if not isCategory(tsy): tsy = pd.cut(tsy, bins=bins) logging.warning( 'the Series tsy is not category type, will be convert to category type by bins of %d' % self.bins) msg['warning'] = '列tsy不是定类(category)数据, 将强制通过bins:%d为转化为定类型数据' % self.bins if not isCategory(tsx): tsx = pd.cut(tsx, bins=bins) logging.warning( 'the Series tsx is not category type, will be convert to category type by bins of %d' % self.bins) if msg.get('warning'): msg['warning'] = msg[ 'warning'] + '列tsx不是定类(category)数据, 将强制通过bins:%d为转化为定类型数据' % self.bins else: msg['warning'] = 't列tsx不是定类(category)数据, 将强制通过bins:%d为转化为定类型数据' % self.bins table, results = crosstab(tsx, tsy, prop='col', test='chi-square') return {'result': results, 'table': table, 'msg': msg}
def run(self, dfx, tsy): tsy = tsy.reset_index(drop=True) dfx = dfx.reset_index(drop=True) msg = {} xl = len(dfx) yl = len(tsy) if xl != yl: logging.error( 'the length of input X:%s is not equal the length of Y: %s ! ' % (xl, yl)) msg['error'] = '输入的dfx的长度为:%s 不等于输入的tsy的长度: %s ' % (xl, yl) return {'result': pd.DataFrame(), 'msg': msg} if not isSeries(tsy) or not isCategory(tsy): logging.error( 'input tsy is not a pandas Series or not a category data!') msg['error'] = '输入的tsy不是定类型数据或者Series类型' return {'result': pd.DataFrame(), 'msg': msg} else: x_numer_cols, x_cate_cols = ParseDFtypes(dfx) if x_numer_cols == []: logging.error( 'All input dfx are no numeric columns, Please check your input dfx data!' ) msg['error'] = '输入的dfx所有的列都不是数值型数据,请检查输入数据' return {'result': pd.DataFrame(), 'msg': msg} else: if x_cate_cols != []: logging.warning( 'input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols) msg['warning'] = '输入的dfx包含了非数值型的列: %s, 将会被自动忽略!' % x_cate_cols name = tsy.name dfu = dfx[x_numer_cols].join(tsy) m = dfu.groupby(name).mean().T s = dfu.groupby(name).std().T def change(ts): v = [] for i in ts.index: r = '%s±%s' % (round(ts.loc[i], 2), round(s[ts.name].loc[i], 2)) v.append(r) return pd.Series(v, index=ts.index) m1 = m.apply(change) rs = [] for i in x_numer_cols: model = ols('%s ~ %s' % (i, tsy.name), dfu).fit() anovat = anova_lm(model) anovat.columns = ['自由度', '平方和', '均方和', 'F-值', 'p-值'] rs.append(anovat.iloc[0].to_frame(name=i).T) res = m1.join(pd.concat(rs)) res['p-值'] = res['p-值'].apply(lambda x: '{:.5f}'.format(x)) return {'result': res.round(5), 'msg': msg}
def run(self, dfx, tsy): tsy = tsy.reset_index(drop=True) dfx = dfx.reset_index(drop=True) msg = {} xl = len(dfx) yl = len(tsy) if xl != yl: logging.error( 'the length of input X:%s is not equal the length of Y: %s ! ' % (xl, yl)) msg['error'] = 'the length of input X:%s is not equal the length of Y: %s ! ' % ( xl, yl) return {'result': pd.DataFrame(), 'msg': msg} if not isSeries(tsy) or not isCategory(tsy): logging.error( 'input tsy is not a pandas Series or not a category data!') msg['error'] = 'input tsy is not a pandas Series or not a category data!' return {'result': pd.DataFrame(), 'msg': msg} else: x_numer_cols, x_cate_cols = ParseDFtypes(dfx) if x_numer_cols == []: logging.error( 'All input dfx are no numeric columns, Please check your input dfx data!' ) msg['error'] = 'All input dfx are no numeric columns, Please check your input dfx data!' return {'result': pd.DataFrame(), 'msg': msg} else: if x_cate_cols != []: logging.warning( 'input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols) msg['warning'] = 'input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols name = tsy.name dfu = dfx[x_numer_cols].join(tsy) m = dfu.groupby(name).mean().T s = dfu.groupby(name).std().T def change(ts): v = [] for i in ts.index: r = '%s±%s' % (round(ts.loc[i], 2), round(s[ts.name].loc[i], 2)) v.append(r) return pd.Series(v, index=ts.index) m1 = m.apply(change) rs = [] for i in x_numer_cols: dd = [dfu[dfu[tsy.name] == c][i] for c in tsy.unique()] F, p = levene(*dd) columns = ['F-值', 'p-值'] rs.append( pd.DataFrame([F, p], index=columns, columns=[i]).T) res = m1.join(pd.concat(rs)) return {'result': res, 'msg': msg}
def run(self, dfx, tsy): tsy = tsy.reset_index(drop=True) dfx = dfx.reset_index(drop=True) msg = {} if not isSeries(tsy) or not isCategory(tsy): logging.error('input tsy is not a pandas Series or not a category data!') msg['error'] = '输入的tsy不是定类型数据或者Series类型' return {'result':pd.DataFrame(), 'msg':msg} else: if len(tsy.unique()) != 2: msg['error'] = '输入的tsy不能被分成2组,请确保值tsy中的数unique后元素个数为2,目前的元素为%s' % tsy.unique() return {'result':pd.DataFrame(), 'msg':msg} else: x_numer_cols, x_cate_cols = ParseDFtypes(dfx) if x_numer_cols ==[]: logging.error('All input dfx are no numeric columns, Please check your input dfx data!') msg['error'] = 'dfx输入的每列都不是数值型数据,请检查输入数据' return {'result':pd.DataFrame(), 'msg':msg} else: if x_cate_cols != []: logging.warning('input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols) msg['warning'] = '输入的dfx包含了非数值型的列: %s, 将会被自动忽略!' % x_cate_cols name = tsy.name dfu = dfx[x_numer_cols].join(tsy) m = dfu.groupby(name).mean().T s = dfu.groupby(name).std().T def change(ts): v= [] for i in ts.index: r = '%s±%s' % (round(ts.loc[i],2),round(s[ts.name].loc[i],2)) v.append(r) return pd.Series(v,index=ts.index) m1 = m.apply(change) rs = [] for i in x_numer_cols: c1 = tsy.unique()[0] c2 = tsy.unique()[1] d1 = dfu[dfu[tsy.name] == c1][i] d2 = dfu[dfu[tsy.name] == c2][i] F, p = ttest_ind(d1,d2) columns = ['t-值', 'p-值'] rs.append(pd.DataFrame([F,p],index=columns,columns=[i]).T) res = m1.join(pd.concat(rs)) return {'result':res.round(5), 'msg':msg}