def JOB_TTM_growth(codes, df, factor, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df['{0}_growthQ'.format(factor)] = \ code_df.apply(lambda row: FactorBase.cal_growth( row['{0}_last1Q'.format(factor)], row['{0}'.format(factor)]), axis=1) code_df['{0}_growthY'.format(factor)] = \ code_df.apply(lambda row: FactorBase.cal_growth( row['{0}_last4Q'.format(factor)], row['{0}'.format(factor)]), axis=1) cols = ['{0}_growthQ'.format(factor), '{0}_growthY'.format(factor)] for i in range(1, 8): code_df['{0}_growthQ_last{1}Q'.format(factor, i)] = \ code_df.apply(lambda row: FactorBase.cal_growth( row['{0}_last{1}Q'.format(factor, i + 1)], row['{0}_last{1}Q'.format(factor, i)]), axis=1) cols.append('{0}_growthQ_last{1}Q'.format(factor, i)) code_df = code_df.loc[:, ['code', 'report_period'] + cols] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.loc[np.any(pd.notnull(code_df[cols]), axis=1), :] if code_df.empty: continue code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s_growth Error: %s' % (factor, r)) return save_res
def JOB_factors(df, field, codes, calendar, start, save_db): columns = df.columns influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() insert_dates = calendar - set(code_df.index) content = [[np.nan] * len(columns)] * len(insert_dates) insert_df = pd.DataFrame(content, columns=columns, index=list(insert_dates)) code_df = code_df.append(insert_df, ignore_index=False).sort_index() code_df = code_df.fillna(method='ffill') code_df = code_df.dropna(subset=['code']) code_df = code_df.loc[str(start):, ] # 所有report_period 为 columns, 去掉第一列(code) rps = np.flipud(code_df.columns[1:]).astype('datetime64[ns]') rp_keys = np.flipud(code_df.columns[1:]) # 选择最新的report_period code_df['report_period'] = code_df.apply(lambda row: row.dropna().index[-1], axis=1) choices = [] for rp in rp_keys: choices.append(code_df[rp].values) # 计算 当期 和 去年同期 code_df['process_rp'] = code_df['report_period'].apply(lambda x: FactorBase.get_former_report_period(x, 0)) conditions = [] for rp in rps: conditions.append(code_df['process_rp'].values == rp) code_df[field] = np.select(conditions, choices, default=np.nan) code_df['process_rp'] = code_df['report_period'].apply(lambda x: FactorBase.get_former_report_period(x, 4)) conditions = [] for rp in rps: conditions.append(code_df['process_rp'].values == rp) code_df[field + '_lastY'] = np.select(conditions, choices, default=np.nan) # 计算过去每一季 res_flds = [] for i in range(1, 13): res_field = field + '_last{0}Q'.format(str(i)) res_flds.append(res_field) code_df['process_rp'] = code_df['report_period'].apply( lambda x: FactorBase.get_former_report_period(x, i)) conditions = [] for rp in rps: conditions.append(code_df['process_rp'].values == rp) code_df[res_field] = np.select(conditions, choices, default=np.nan) # 处理储存数据 code_df = code_df.loc[:, ['code', 'report_period', field, field + '_lastY'] + res_flds] code_df['report_period'] = code_df['report_period'].apply(lambda x: x.strftime('%Y%m%d')) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, save_db, field) if r == 'No error occurred...': pass else: save_res.append('WindIndicator Field: %s Error: %s' % (field, r)) return save_res
def get_former_data(series, n_Qs): report_period = FactorBase.get_former_report_period( series['report_period'], n_Qs) if report_period not in series.index: return np.nan else: return series[report_period]