def daily_compute(self, busi_date): """ """ spark = self.spark yesterday = get_date(self.date_order, self.order_date, busi_date, -1) unclose = spark.sql("select * from %s.%s where busi_date='%s'" % (self.fdata, self.unclose_cal, yesterday)) trd = spark.sql("select * from %s.%s where busi_date='%s'" % ( self.odata, self.cash_flow_table, busi_date))\ .filter("trd_type='short_related' and prd_no!='0.0'")\ .filter("trd_qty!=0 or amortize_label!=0")\ .withColumn("now_trd", func.struct("timestamp", "trd_qty", "trd_cash_flow", "amortize_label"))\ .groupBy("trade_id", "secu_acc_id", "prd_no").\ agg(func.collect_list("now_trd").alias("now_trd")).\ persist(StorageLevel.DISK_ONLY) trd.count() debt = spark.sql("select * from %s.%s where busi_date='%s'" % ( self.odata, self.debt_table, busi_date))\ .filter("prd_no!='0.0'")\ .select("trade_id", "secu_acc_id", "prd_no", "liab_qty", "mkt_val") df = full_outer_join(unclose, trd, ["trade_id", "secu_acc_id", "prd_no"]) df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"]) df.persist(StorageLevel.DISK_ONLY).count() data = df.rdd.map( lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday)) if data.count() > 0: data.persist(StorageLevel.DISK_ONLY).count() self.save_trade_close_data(data, spark, busi_date) self.save_trade_unclose_data(data, spark, busi_date, lambda t: int(t["unclose_qty"]) < 0) else: print '没有做空开仓交易的数据'
def _get_base_data(self, spark, table, enddate, odataTable, type): sqlTemp = "select * from {0}.{1} where busi_date='{2}'" startdate = get_date(self.date_order, self.order_date, enddate, -1) unCloseCalDf = spark.sql(sqlTemp.format(self.fdata, table, startdate)) trd_type = "long_related" if type == 'long' else "short_related" cashDetailDf = spark.sql( sqlTemp.format(self.odata, self.cash_flow_detail, enddate)).filter( "trd_type='%s' and prd_no!='0.0'" % trd_type).filter( "trd_qty!=0 or amortize_label!=0 ").persist( storageLevel=StorageLevel.DISK_ONLY).\ withColumn("trd_detail", sqlf.struct("timestamp", "trd_qty", "trd_cash_flow", "amortize_label"))\ .groupBy("trade_id", "secu_acc_id", "prd_no").agg( sqlf.collect_list("trd_detail").alias("trd_detail_list")) cashDetailDf.persist(StorageLevel.DISK_ONLY).count() moneyDf = spark.sql(sqlTemp.format( self.odata, odataTable, enddate)).filter("prd_no!='0.0'").select( "trade_id", "secu_acc_id", "prd_no", "qty" if type == 'long' else "liab_qty", "mkt_val") tempDf = full_outer_join(unCloseCalDf, cashDetailDf, ["trade_id", "secu_acc_id", "prd_no"]) baseDf = full_outer_join(tempDf, moneyDf, ["trade_id", "secu_acc_id", "prd_no"]) baseDf.persist(StorageLevel.DISK_ONLY).count() return baseDf
def daily_compute(self, startdate=None, enddate=None): """ daily_compute :param startdate: :param enddate: :return: """ # 月度计算,需要从一号开始 yesterday = get_date(self.date_order, self.order_date, enddate, -1) diff_days = days_between(enddate, yesterday) LongBaseDf = self._get_base_data( self.sparkSession, self.stock_unclose_tradebyday_long_cal, enddate, self.asset_table, "long") longBaseRdd = LongBaseDf.rdd.mapPartitions( lambda rows: _travel_long_iter(rows, enddate, diff_days)) longBaseRdd.persist(StorageLevel.DISK_ONLY) if longBaseRdd.count() > 0: self.save_all_data(longBaseRdd, enddate, "long") ShortBaseDf = \ self._get_base_data(self.sparkSession, self.stock_unclose_tradebyday_short_cal, enddate, self.debt_table, "short") shortBaseRdd = ShortBaseDf.rdd.mapPartitions( lambda rows: _travel_short_iter(rows, enddate, diff_days)) shortBaseRdd.persist(StorageLevel.DISK_ONLY) if shortBaseRdd.count() > 0: self.save_all_data(shortBaseRdd, enddate, "short")
def _cal_asset_debt(self, startdate=None, enddate=None): """ T日的asset,debt,net_asset :param startdate: :param enddate: :return: """ sqlTmp1 = """ select nvl(a.trade_id,b.trade_id) trade_id,nvl(a.mkt_val,0) asset,nvl(b.mkt_val,0) debt, nvl(a.mkt_val,0)+nvl(b.mkt_val,0) net_asset from ( select trade_id, sum(mkt_val) mkt_val from {1}.{2} where busi_date='{0}' group by trade_id ) a full outer join ( select trade_id, sum(mkt_val) mkt_val from {1}.{3} where busi_date='{0}' group by trade_id ) b on a.trade_id=b.trade_id """ selectSql1 = sqlTmp1.format(enddate, self.odata, self.stock_asset_holding, self.stock_debt_holding) tempTable1 = "cust_daily_asset_debt_tmp" self._reg_table(selectSql1, tempTable1) startdate = get_date(self.date_order, self.order_date, enddate, -1) selectSql2 = sqlTmp1.format(startdate, self.odata, self.stock_asset_holding, self.stock_debt_holding) tempTable2 = "cust_daily_asset_debt_before_tmp" self._reg_table(selectSql2, tempTable2) return tempTable1, tempTable2
def __init__(self, spark, busi_date, mode): LeekSparkJob.__init__(self, spark) # T-1Date self.enddate = busi_date if not self.is_trading_day(self.enddate): print("task start error day[%s] is not trading day" % self.enddate) sys.exit(0) # T-2Date if self.logLevel != 'debug': self.startdate = get_date(self.date_order, self.order_date, self.enddate, -1) else: self.startdate = self.enddate self.mode = mode # 如果不是交易日,则不进行计算
def daily_compute(self, busi_date): """ """ spark = self.spark yesterday = get_date(self.date_order, self.order_date, busi_date, -1) unclose = spark.sql("select * from %s.%s where busi_date='%s'" % (self.fdata, self.unclose_cal, yesterday)) trd = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.cash_flow_table, busi_date)) trd = trd.filter("trd_type='long_related' and prd_no!='0.0'") # trd_qty=0时,一定有amortize_label!=0 (==1) trd = trd.filter("trd_qty!=0 or amortize_label!=0") asset = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.asset_table, busi_date)) asset = asset.filter("prd_no!='0.0'") asset = asset.select("trade_id", "secu_acc_id", "prd_no", "qty", "mkt_val") trd = trd.withColumn( "now_trd", func.struct("timestamp", "trd_qty", "trd_cash_flow", "amortize_label")) trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \ .agg(func.collect_list("now_trd").alias("now_trd")) trd.persist(StorageLevel.DISK_ONLY) trd.count() df = full_outer_join(unclose, trd, ["trade_id", "secu_acc_id", "prd_no"]) df = full_outer_join(df, asset, ["trade_id", "secu_acc_id", "prd_no"]) df.persist(StorageLevel.DISK_ONLY) df.count() data = df.rdd.map( lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday)) data.persist(StorageLevel.DISK_ONLY) if data.count() > 0: self.save_close_data(data, busi_date, lambda t: int(t["unclose_qty"]) == 0, sort_long_close_data) self.save_unclose_data(data, busi_date, lambda t: int(t["unclose_qty"]) > 0, sort_long_unclose_data) self.save_unclose_cal(data, busi_date, lambda t: int(t["unclose_qty"]) > 0, sort_long_unclose_cal)
def daily_compute(self, startdate=None, enddate=None): startdate = get_date(self.date_order, self.order_date, enddate, -19) sql = """ select busi_date,trade_id,long_return,short_return,total_return, nvl(total_return_rate,0.0) total_return_rate, exception_label ac_exception_label, case when total_return>=0 and total_return_rate>=0 then 0 when total_return<0 and total_return_rate<0 then 0 else 1 end pos_neg_exception, case when abs(long_return+short_return-total_return)<0.01 then 0 else 1 end detail_sum_exception, case when abs(total_return_rate)<0.223 then 0 else 1 end rr_outlier_exception from {1}.{2} where busi_date='{0}' """ sqlCmd = sql.format(enddate, self.adata, self.stock_cust_daily_return) df = self.sparkSession.sql(sqlCmd) sqllast20 = """ select trade_id, avg(total_return_rate)+3*stddev(total_return_rate) std_return_rate, count(busi_date) num_data from {2}.{3} where busi_date>='{0}' and busi_date<='{1}' group by trade_id """.format(startdate, enddate, self.adata, self.stock_cust_daily_return) dflast20 = self.sparkSession.sql(sqllast20) df = df.join(dflast20, "trade_id", "inner")\ .select(df["*"], fn.when((dflast20.num_data == 20) & (dflast20.std_return_rate <= df.total_return_rate), 0) .when(dflast20.num_data < 20, 0) .when(dflast20.std_return_rate == 0, 0) .otherwise(1).alias("rr_sp_exception"))\ .where(""" ac_exception_label!=0 or pos_neg_exception!=0 or detail_sum_exception!=0 or rr_outlier_exception!=0 or rr_sp_exception!=0 """)\ .persist(StorageLevel.DISK_ONLY).repartition(5) save_data(self.sparkSession, self.adata, self.stock_dr_check_exception_data, enddate, df)
def daily_compute(self, busi_date): """ """ spark = self.spark yesterday = get_date(self.date_order, self.order_date, busi_date, -1) unclose = spark.sql("select * from %s.%s where busi_date='%s'" % (self.fdata, self.unclose_table, yesterday)) trd = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.cash_flow_table, busi_date)) trd = trd.filter("trd_type='short_related' and prd_no!='0.0'") trd = trd.withColumn( "now_trd", func.struct("trd_qty", "trd_cash_flow", "timestamp")) trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \ .agg(func.collect_list("now_trd").alias("now_trd")) trd.persist(StorageLevel.DISK_ONLY) trd.count() debt = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.debt_table, busi_date)) debt = debt.select("trade_id", "secu_acc_id", "prd_no", "liab_qty", "mkt_val") debt = debt.filter("prd_no!='0.0'") df = full_outer_join(unclose, trd, ["trade_id", "secu_acc_id", "prd_no"]) df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"]) df.persist(StorageLevel.DISK_ONLY) df.count() data = df.rdd.map(lambda x: Row(**short_compute( x.asDict(recursive=True), busi_date))) if data.count() > 0: data = data.toDF() close = data.filter("remain_liab_qty = 0") close = close.withColumn("close_date", close.busi_date) self.check_data(close) unclose = data.filter("remain_liab_qty != 0") self.save_close_data(close, busi_date) self.save_unclose_data(unclose, busi_date) else: print "清仓股票收益做空没有数据输出"
def daily_compute(self, startdate=None, enddate=None): #计算T日的return df_o, df_o_long, df_o_short, df_tbd, df_tbd_long, df_tbd_short = self._cal_return( enddate) startdate = get_date(self.date_order, self.order_date, enddate, -1) df_o_1, df_o_long_1, df_o_short_1, df_tbd_1, df_tbd_long_1, df_tbd_short_1\ = self._cal_return(startdate,oreturn="o_return_1", tbdreturn="tbd_return_1", oept="o_exception_label_1", tbdept="tbd_exception_label_1", ounclose="o_unclose_amt_1", tbdunclose="tbd_unclose_amt_1", type='unclose') # cal:long/ df1 = full_outer_join(df_o, df_o_1, ["trade_id", "secu_acc_id", "prd_no"]) df1.registerTempTable("temp_o_return_table") sqlOTemp = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0.0)-nvl(o_return_1,0.0) {1}, nvl(o_unclose_amt,0.0)-nvl(o_unclose_amt_1,0.0) {2}, nvl(o_exception_label,o_exception_label_1) o_exception_label from {0} """ dfOreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_return_table", "o_return", "o_unclose_amt")) dfOreturn.show() dfOLong = full_outer_join(df_o_long, df_o_long_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOLong.registerTempTable("temp_o_long_return_table") dfOlongreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_long_return_table", "o_return", "o_unclose_amt")) dfOlongreturn.show() dfOShort = full_outer_join(df_o_short, df_o_short_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOShort.registerTempTable("temp_o_short_return_table") dfOShortreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_short_return_table", "o_return", "o_unclose_amt")) df2 = full_outer_join(df_tbd, df_tbd_1, ["trade_id", "secu_acc_id", "prd_no"]) df2.registerTempTable("temp_tbd_return_table") sqlTbdTemp = """ select trade_id,secu_acc_id,prd_no, nvl(tbd_return,0.0)-nvl(tbd_return_1,0.0) {1}, nvl(tbd_unclose_amt,0.0)-nvl(tbd_unclose_amt_1,0.0) {2}, nvl(tbd_exception_label,tbd_exception_label_1) tbd_exception_label from {0} """ dfTbdreturn = self.sparkSession.sql( sqlTbdTemp.format("temp_tbd_return_table", "tbd_return", "tbd_unclose_amt")) dfTbdreturn.show() dfTbdLong = full_outer_join(df_tbd_long, df_tbd_long_1, ["trade_id", "secu_acc_id", "prd_no"]) dfTbdLong.registerTempTable("temp_tbd_long_return_table") dfTbdlongreturn = self.sparkSession.sql( sqlTbdTemp.format("temp_tbd_long_return_table", "tbd_return", "tbd_unclose_amt")) dfTbdshort = full_outer_join(df_tbd_short, df_tbd_short_1, ["trade_id", "secu_acc_id", "prd_no"]) dfTbdshort.registerTempTable("temp_tbd_short_return_table") dfTbdShortreturn = self.sparkSession.sql( sqlTbdTemp.format("temp_tbd_short_return_table", "tbd_return", "tbd_unclose_amt")) dfall = full_outer_join(dfOreturn, dfTbdreturn, ["trade_id", "secu_acc_id", "prd_no"]) dfall.registerTempTable("temp_all_return") sql_all_return = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0) o_return, nvl(tbd_return,0) tbd_return, nvl(o_exception_label,0) o_exception_label, nvl(tbd_exception_label,0) tbd_exception_label from {0} """.format("temp_all_return") dfall = self.sparkSession.sql(sql_all_return) print 'dfall' dfall.show() dfLong = full_outer_join(dfOlongreturn, dfTbdlongreturn, ["trade_id", "secu_acc_id", "prd_no"]) dfLong.registerTempTable("temp_all_long_return") sqlOcTemp = """ select trade_id,secu_acc_id,prd_no, case when abs(nvl(o_return,0.0)-nvl(tbd_return,0.0))<=0.01 then 0 else 1 end {1}, case when abs(nvl(o_unclose_amt,0.0)-nvl(tbd_unclose_amt,0.0))<=0.01 then 0 else 1 end {2} from {0} """ sqlOcLong = sqlOcTemp.format("temp_all_long_return", "lr_equal_exception", "lmv_euqal_exception") df_oc_long = self.sparkSession.sql(sqlOcLong) dfShort = full_outer_join(dfOShortreturn, dfTbdShortreturn, ["trade_id", "secu_acc_id", "prd_no"]) dfShort.registerTempTable("temp_all_short_return") sqlOcShort = sqlOcTemp.format("temp_all_short_return", "sr_equal_exception", "smv_equal_exception") df_oc_short = self.sparkSession.sql(sqlOcShort) df2 = full_outer_join(df_oc_long, df_oc_short, ["trade_id", "secu_acc_id", "prd_no"]) df3 = full_outer_join(dfall, df2, ["trade_id", "secu_acc_id", "prd_no"]) df3.registerTempTable("temp_stockotbdcheckexception") sql = """select trade_id, secu_acc_id, '{0}' busi_date, prd_no, nvl(tbd_return,0) tbd_return, nvl(tbd_exception_label,0) tbd_exception_label, nvl(o_return,0) o_return, nvl(o_exception_label,0) o_exception_label, nvl(lr_equal_exception,0) lr_equal_exception, nvl(lmv_euqal_exception,0) lmv_euqal_exception, nvl(sr_equal_exception,0) sr_equal_exception, nvl(smv_equal_exception,0) smv_equal_exception from temp_stockotbdcheckexception """ finalDf = self.sparkSession.sql(sql.format(enddate)) self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)
def daily_compute(self, startdate=None, enddate=None): # cal:o_return,o_exception_label,c_return,c_exception_label #获取T日的数据 df_o, df_o_long, df_o_short = self._cal_return(enddate) startdate = get_date(self.date_order, self.order_date, enddate, -1) df_o_1, df_o_long_1, df_o_short_1\ = self._cal_return(startdate,oreturn="o_return_1", oept="o_exception_label_1", ounclose="o_unclose_amt_1", type='unclose') df_dc_long = self._cal_dc(enddate, 'long_related') df_dc_short = self._cal_dc(enddate, 'short_related') df_dc = df_dc_long.union(df_dc_short).groupBy( ["trade_id", "secu_acc_id", "prd_no"]).agg( fn.sum("dc_return").alias("dc_return"), fn.max("dc_exception_label").alias("dc_exception_label")) # cal:long/ df1 = full_outer_join(df_o, df_o_1, ["trade_id", "secu_acc_id", "prd_no"]) df1.registerTempTable("temp_o_return_table") sqlOTemp = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0.0)-nvl(o_return_1,0.0) {1}, nvl(o_unclose_amt,0.0) {2}, nvl(o_exception_label,o_exception_label_1) o_exception_label from {0} """ dfOreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_return_table", "o_return", "o_unclose_amt")) dfOLong = full_outer_join(df_o_long, df_o_long_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOLong.registerTempTable("temp_o_long_return_table") dfOlongreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_long_return_table", "o_return", "o_unclose_amt")) dfOShort = full_outer_join(df_o_short, df_o_short_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOShort.registerTempTable("temp_o_short_return_table") dfOShortreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_short_return_table", "o_return", "o_unclose_amt")) dfall = full_outer_join(dfOreturn, df_dc, ["trade_id", "secu_acc_id", "prd_no"]) dfall.registerTempTable("temp_all_return") sql_all_return = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0) o_return, nvl(dc_return,0) dc_return, nvl(o_exception_label,0) o_exception_label, nvl(dc_exception_label,0) dc_exception_label from {0} """.format("temp_all_return") dfall = self.sparkSession.sql(sql_all_return) dfLong = full_outer_join(dfOlongreturn, df_dc_long, ["trade_id", "secu_acc_id", "prd_no"]) dfLong.registerTempTable("temp_all_long_return") sqlOcTemp = """ select trade_id,secu_acc_id,prd_no, case when abs(nvl(o_return,0.0)-nvl(dc_return,0.0))<=0.01 then 0 else 1 end {1}, case when abs(nvl(o_unclose_amt,0.0)-nvl(dc_unclose_amt,0.0))<=0.01 then 0 else 1 end {2} from {0} """ sqlOcLong = sqlOcTemp.format("temp_all_long_return", "lr_equal_exception", "lmv_euqal_exception") df_oc_long = self.sparkSession.sql(sqlOcLong) dfShort = full_outer_join(dfOShortreturn, df_dc_short, ["trade_id", "secu_acc_id", "prd_no"]) dfShort.registerTempTable("temp_all_short_return") sqlOcShort = sqlOcTemp.format("temp_all_short_return", "sr_equal_exception", "smv_equal_exception") df_oc_short = self.sparkSession.sql(sqlOcShort) df2 = full_outer_join(df_oc_long, df_oc_short, ["trade_id", "secu_acc_id", "prd_no"]) df3 = full_outer_join(dfall, df2, ["trade_id", "secu_acc_id", "prd_no"]) df3.registerTempTable("temp_stockodccheckexception") sql = """select trade_id, secu_acc_id, '{0}' busi_date, prd_no, nvl(o_return,0) o_return, nvl(o_exception_label,0) o_exception_label, nvl(dc_return,0) dc_return, nvl(dc_exception_label,0) dc_exception_label, nvl(lr_equal_exception,0) lr_equal_exception, nvl(lmv_euqal_exception,0) lmv_euqal_exception, nvl(sr_equal_exception,0) sr_equal_exception, nvl(smv_equal_exception,0) smv_equal_exception from temp_stockodccheckexception """.format(enddate) finalDf = self.sparkSession.sql(sql) self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)