def daily_compute(self, busi_date): """ """ spark = self.spark yesterday = get_date(self.date_order, self.order_date, busi_date, -1) unclose = spark.sql("select * from %s.%s where busi_date='%s'" % (self.fdata, self.unclose_cal, yesterday)) trd = spark.sql("select * from %s.%s where busi_date='%s'" % ( self.odata, self.cash_flow_table, busi_date))\ .filter("trd_type='short_related' and prd_no!='0.0'")\ .filter("trd_qty!=0 or amortize_label!=0")\ .withColumn("now_trd", func.struct("timestamp", "trd_qty", "trd_cash_flow", "amortize_label"))\ .groupBy("trade_id", "secu_acc_id", "prd_no").\ agg(func.collect_list("now_trd").alias("now_trd")).\ persist(StorageLevel.DISK_ONLY) trd.count() debt = spark.sql("select * from %s.%s where busi_date='%s'" % ( self.odata, self.debt_table, busi_date))\ .filter("prd_no!='0.0'")\ .select("trade_id", "secu_acc_id", "prd_no", "liab_qty", "mkt_val") df = full_outer_join(unclose, trd, ["trade_id", "secu_acc_id", "prd_no"]) df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"]) df.persist(StorageLevel.DISK_ONLY).count() data = df.rdd.map( lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday)) if data.count() > 0: data.persist(StorageLevel.DISK_ONLY).count() self.save_trade_close_data(data, spark, busi_date) self.save_trade_unclose_data(data, spark, busi_date, lambda t: int(t["unclose_qty"]) < 0) else: print '没有做空开仓交易的数据'
def _get_base_data(self, spark, table, enddate, odataTable, type): sqlTemp = "select * from {0}.{1} where busi_date='{2}'" startdate = get_date(self.date_order, self.order_date, enddate, -1) unCloseCalDf = spark.sql(sqlTemp.format(self.fdata, table, startdate)) trd_type = "long_related" if type == 'long' else "short_related" cashDetailDf = spark.sql( sqlTemp.format(self.odata, self.cash_flow_detail, enddate)).filter( "trd_type='%s' and prd_no!='0.0'" % trd_type).filter( "trd_qty!=0 or amortize_label!=0 ").persist( storageLevel=StorageLevel.DISK_ONLY).\ withColumn("trd_detail", sqlf.struct("timestamp", "trd_qty", "trd_cash_flow", "amortize_label"))\ .groupBy("trade_id", "secu_acc_id", "prd_no").agg( sqlf.collect_list("trd_detail").alias("trd_detail_list")) cashDetailDf.persist(StorageLevel.DISK_ONLY).count() moneyDf = spark.sql(sqlTemp.format( self.odata, odataTable, enddate)).filter("prd_no!='0.0'").select( "trade_id", "secu_acc_id", "prd_no", "qty" if type == 'long' else "liab_qty", "mkt_val") tempDf = full_outer_join(unCloseCalDf, cashDetailDf, ["trade_id", "secu_acc_id", "prd_no"]) baseDf = full_outer_join(tempDf, moneyDf, ["trade_id", "secu_acc_id", "prd_no"]) baseDf.persist(StorageLevel.DISK_ONLY).count() return baseDf
def daily_compute(self, busi_date): """ """ spark = self.spark yesterday = get_date(self.date_order, self.order_date, busi_date, -1) unclose = spark.sql("select * from %s.%s where busi_date='%s'" % (self.fdata, self.unclose_cal, yesterday)) trd = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.cash_flow_table, busi_date)) trd = trd.filter("trd_type='long_related' and prd_no!='0.0'") # trd_qty=0时,一定有amortize_label!=0 (==1) trd = trd.filter("trd_qty!=0 or amortize_label!=0") asset = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.asset_table, busi_date)) asset = asset.filter("prd_no!='0.0'") asset = asset.select("trade_id", "secu_acc_id", "prd_no", "qty", "mkt_val") trd = trd.withColumn( "now_trd", func.struct("timestamp", "trd_qty", "trd_cash_flow", "amortize_label")) trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \ .agg(func.collect_list("now_trd").alias("now_trd")) trd.persist(StorageLevel.DISK_ONLY) trd.count() df = full_outer_join(unclose, trd, ["trade_id", "secu_acc_id", "prd_no"]) df = full_outer_join(df, asset, ["trade_id", "secu_acc_id", "prd_no"]) df.persist(StorageLevel.DISK_ONLY) df.count() data = df.rdd.map( lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday)) data.persist(StorageLevel.DISK_ONLY) if data.count() > 0: self.save_close_data(data, busi_date, lambda t: int(t["unclose_qty"]) == 0, sort_long_close_data) self.save_unclose_data(data, busi_date, lambda t: int(t["unclose_qty"]) > 0, sort_long_unclose_data) self.save_unclose_cal(data, busi_date, lambda t: int(t["unclose_qty"]) > 0, sort_long_unclose_cal)
def daily_compute(self, busi_date): """ """ spark = self.spark yesterday = get_date(self.date_order, self.order_date, busi_date, -1) unclose = spark.sql("select * from %s.%s where busi_date='%s'" % (self.fdata, self.unclose_table, yesterday)) trd = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.cash_flow_table, busi_date)) trd = trd.filter("trd_type='short_related' and prd_no!='0.0'") trd = trd.withColumn( "now_trd", func.struct("trd_qty", "trd_cash_flow", "timestamp")) trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \ .agg(func.collect_list("now_trd").alias("now_trd")) trd.persist(StorageLevel.DISK_ONLY) trd.count() debt = spark.sql("select * from %s.%s where busi_date='%s'" % (self.odata, self.debt_table, busi_date)) debt = debt.select("trade_id", "secu_acc_id", "prd_no", "liab_qty", "mkt_val") debt = debt.filter("prd_no!='0.0'") df = full_outer_join(unclose, trd, ["trade_id", "secu_acc_id", "prd_no"]) df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"]) df.persist(StorageLevel.DISK_ONLY) df.count() data = df.rdd.map(lambda x: Row(**short_compute( x.asDict(recursive=True), busi_date))) if data.count() > 0: data = data.toDF() close = data.filter("remain_liab_qty = 0") close = close.withColumn("close_date", close.busi_date) self.check_data(close) unclose = data.filter("remain_liab_qty != 0") self.save_close_data(close, busi_date) self.save_unclose_data(unclose, busi_date) else: print "清仓股票收益做空没有数据输出"
def daily_compute(self, startdate=None, enddate=None): df1 = self._cal_rr_sum_exception(enddate) df2 = self._cal_r_sum_exception(enddate) df3 = full_outer_join(df1, df2, ["trade_id", "busi_date", "compute_term"]) df3.registerTempTable("temp_stockinvplcheckexception") sql = """select trade_id, busi_date, compute_term, nvl(r_sum_exception,0) pl_r_sum_exception, nvl(trd_sum_exception,0) pl_trd_sum_exception, nvl(rr_sum_exception,0) rr_sum_exception from temp_stockinvplcheckexception """ finalDf = self.sparkSession.sql(sql) self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)
def daily_compute(self, startdate=None, enddate=None): # cal:o_return,o_exception_label,c_return,c_exception_label df_o_long = self._cal_data(enddate, self.fdata, self.stock_close_o_trade_long_data, self.stock_unclose_o_trade_long_data, 'o_return', 'o_exception_label', 'o_unclose_amt') df_o_short = self._cal_data(enddate, self.fdata, self.stock_close_o_trade_short_data, self.stock_unclose_o_trade_short_data, 'o_return', 'o_exception_label', 'o_unclose_amt') df_o = df_o_long.union(df_o_short).groupBy( ["trade_id", "secu_acc_id", "busi_date", "prd_no"]).agg( fn.sum("o_return").alias("o_return"), fn.max("o_exception_label").alias("o_exception_label")) if LOG_LEVEL == 'debug': df_o.show() df_c_long = self._cal_data(enddate, self.fdata, self.stock_close_c_trade_long_data, self.stock_unclose_c_trade_long_data, 'c_return', 'c_exception_label', 'c_unclose_amt') df_c_short = self._cal_data(enddate, self.fdata, self.stock_close_c_trade_short_data, self.stock_unclose_c_trade_short_data, 'c_return', 'c_exception_label', 'c_unclose_amt') df_c = df_c_long.union(df_c_short).groupBy( ["trade_id", "secu_acc_id", "busi_date", "prd_no"]).agg( fn.sum("c_return").alias("c_return"), fn.max("c_exception_label").alias("c_exception_label")) # cal:long/ ocLong = full_outer_join( df_o_long, df_c_long, ["trade_id", "secu_acc_id", "busi_date", "prd_no"]) ocLong.registerTempTable("temp_oc_long_check_1") sqlOcTemp = """ select trade_id,secu_acc_id,busi_date,prd_no, case when abs(nvl(c_return,0.0)-nvl(o_return,0.0))<=0.01 then 0 else 1 end {1}, case when abs(nvl(c_unclose_amt,0.0)-nvl(o_unclose_amt,0.0))<=0.01 then 0 else 1 end {2} from {0} """ sqlOcLong = sqlOcTemp.format("temp_oc_long_check_1", "lr_equal_exception", "lmv_equal_exception") df_oc_long = self.sparkSession.sql(sqlOcLong) ocShort = full_outer_join( df_o_short, df_c_short, ["trade_id", "secu_acc_id", "busi_date", "prd_no"]) ocShort.registerTempTable("temp_oc_short_check_2") sqlOcShort = sqlOcTemp.format("temp_oc_short_check_2", "sr_equal_exception", "smv_equal_exception") df_oc_short = self.sparkSession.sql(sqlOcShort) df1 = full_outer_join( df_o, df_c, ["trade_id", "secu_acc_id", "busi_date", "prd_no"]) df2 = full_outer_join( df_oc_long, df_oc_short, ["trade_id", "secu_acc_id", "busi_date", "prd_no"]) df3 = full_outer_join( df1, df2, ["trade_id", "secu_acc_id", "busi_date", "prd_no"]) df3.registerTempTable("temp_stockoccheckexception") sql = """ select trade_id, secu_acc_id, busi_date, prd_no, nvl(o_return,0) o_return, nvl(o_exception_label,0) o_exception_label, nvl(c_return,0) c_return, nvl(c_exception_label,0) c_exception_label, nvl(lr_equal_exception,0) lr_equal_exception, nvl(lmv_equal_exception,0) lmv_equal_exception, nvl(sr_equal_exception,0) sr_equal_exception, nvl(smv_equal_exception,0) smv_equal_exception from temp_stockoccheckexception """ finalDf = self.sparkSession.sql(sql) if LOG_LEVEL == 'debug': finalDf.show() self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)
def daily_compute(self, startdate=None, enddate=None): # cal:c_return,c_exception_label,c_return,c_exception_label #获取闭仓收益的数据 df_c, df_c_long, df_c_short = self._cal_return(enddate) df_dc_long = self._cal_dc(enddate, 'long_related') df_dc_short = self._cal_dc(enddate, 'short_related') #获取daily_check_data的数据 df_dc = df_dc_long.union(df_dc_short).groupBy( ["trade_id", "secu_acc_id", "prd_no"]).agg( fn.sum("dc_return").alias("dc_return"), fn.max("dc_exception_label").alias("dc_exception_label")) # cal:long/ dfall = full_outer_join(df_c, df_dc, ["trade_id", "secu_acc_id", "prd_no"]) dfall.registerTempTable("temp_all_return") sql_all_return = """ select trade_id,secu_acc_id,prd_no, nvl(c_return,0) c_return, nvl(dc_return,0) dc_return, nvl(c_exception_label,0) c_exception_label, nvl(dc_exception_label,0) dc_exception_label from {0} """.format("temp_all_return") dfall = self.sparkSession.sql(sql_all_return) dfLong = full_outer_join(df_c_long, df_dc_long, ["trade_id", "secu_acc_id", "prd_no"]) dfLong.registerTempTable("temp_all_long_return") sqlOcTemp = """ select trade_id,secu_acc_id,prd_no, case when abs(nvl(c_return,0.0)-nvl(dc_return,0.0))<=0.01 then 0 else 1 end {1}, case when abs(nvl(c_unclose_amt,0.0)-nvl(dc_unclose_amt,0.0))<=0.01 then 0 else 1 end {2} from {0} """ sqlOcLong = sqlOcTemp.format("temp_all_long_return", "lr_equal_exception", "lmv_euqal_exception") df_cdc_long = self.sparkSession.sql(sqlOcLong) dfShort = full_outer_join(df_c_short, df_dc_short, ["trade_id", "secu_acc_id", "prd_no"]) dfShort.registerTempTable("temp_all_short_return") sqlOcShort = sqlOcTemp.format("temp_all_short_return", "sr_equal_exception", "smv_equal_exception") df_cdc_short = self.sparkSession.sql(sqlOcShort) df2 = full_outer_join(df_cdc_long, df_cdc_short, ["trade_id", "secu_acc_id", "prd_no"]) df3 = full_outer_join(dfall, df2, ["trade_id", "secu_acc_id", "prd_no"]) df3.registerTempTable("temp_stockodccheckexception") sql = """select trade_id, secu_acc_id, '{0}' busi_date, prd_no, nvl(c_return,0) c_return, nvl(c_exception_label,0) c_exception_label, nvl(dc_return,0) dc_return, nvl(dc_exception_label,0) dc_exception_label, nvl(lr_equal_exception,0) lr_equal_exception, nvl(lmv_euqal_exception,0) lmv_euqal_exception, nvl(sr_equal_exception,0) sr_equal_exception, nvl(smv_equal_exception,0) smv_equal_exception from temp_stockodccheckexception """.format(enddate) finalDf = self.sparkSession.sql(sql) self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)
def daily_compute(self, startdate=None, enddate=None): #计算T日的return df_o, df_o_long, df_o_short, df_tbd, df_tbd_long, df_tbd_short = self._cal_return( enddate) startdate = get_date(self.date_order, self.order_date, enddate, -1) df_o_1, df_o_long_1, df_o_short_1, df_tbd_1, df_tbd_long_1, df_tbd_short_1\ = self._cal_return(startdate,oreturn="o_return_1", tbdreturn="tbd_return_1", oept="o_exception_label_1", tbdept="tbd_exception_label_1", ounclose="o_unclose_amt_1", tbdunclose="tbd_unclose_amt_1", type='unclose') # cal:long/ df1 = full_outer_join(df_o, df_o_1, ["trade_id", "secu_acc_id", "prd_no"]) df1.registerTempTable("temp_o_return_table") sqlOTemp = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0.0)-nvl(o_return_1,0.0) {1}, nvl(o_unclose_amt,0.0)-nvl(o_unclose_amt_1,0.0) {2}, nvl(o_exception_label,o_exception_label_1) o_exception_label from {0} """ dfOreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_return_table", "o_return", "o_unclose_amt")) dfOreturn.show() dfOLong = full_outer_join(df_o_long, df_o_long_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOLong.registerTempTable("temp_o_long_return_table") dfOlongreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_long_return_table", "o_return", "o_unclose_amt")) dfOlongreturn.show() dfOShort = full_outer_join(df_o_short, df_o_short_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOShort.registerTempTable("temp_o_short_return_table") dfOShortreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_short_return_table", "o_return", "o_unclose_amt")) df2 = full_outer_join(df_tbd, df_tbd_1, ["trade_id", "secu_acc_id", "prd_no"]) df2.registerTempTable("temp_tbd_return_table") sqlTbdTemp = """ select trade_id,secu_acc_id,prd_no, nvl(tbd_return,0.0)-nvl(tbd_return_1,0.0) {1}, nvl(tbd_unclose_amt,0.0)-nvl(tbd_unclose_amt_1,0.0) {2}, nvl(tbd_exception_label,tbd_exception_label_1) tbd_exception_label from {0} """ dfTbdreturn = self.sparkSession.sql( sqlTbdTemp.format("temp_tbd_return_table", "tbd_return", "tbd_unclose_amt")) dfTbdreturn.show() dfTbdLong = full_outer_join(df_tbd_long, df_tbd_long_1, ["trade_id", "secu_acc_id", "prd_no"]) dfTbdLong.registerTempTable("temp_tbd_long_return_table") dfTbdlongreturn = self.sparkSession.sql( sqlTbdTemp.format("temp_tbd_long_return_table", "tbd_return", "tbd_unclose_amt")) dfTbdshort = full_outer_join(df_tbd_short, df_tbd_short_1, ["trade_id", "secu_acc_id", "prd_no"]) dfTbdshort.registerTempTable("temp_tbd_short_return_table") dfTbdShortreturn = self.sparkSession.sql( sqlTbdTemp.format("temp_tbd_short_return_table", "tbd_return", "tbd_unclose_amt")) dfall = full_outer_join(dfOreturn, dfTbdreturn, ["trade_id", "secu_acc_id", "prd_no"]) dfall.registerTempTable("temp_all_return") sql_all_return = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0) o_return, nvl(tbd_return,0) tbd_return, nvl(o_exception_label,0) o_exception_label, nvl(tbd_exception_label,0) tbd_exception_label from {0} """.format("temp_all_return") dfall = self.sparkSession.sql(sql_all_return) print 'dfall' dfall.show() dfLong = full_outer_join(dfOlongreturn, dfTbdlongreturn, ["trade_id", "secu_acc_id", "prd_no"]) dfLong.registerTempTable("temp_all_long_return") sqlOcTemp = """ select trade_id,secu_acc_id,prd_no, case when abs(nvl(o_return,0.0)-nvl(tbd_return,0.0))<=0.01 then 0 else 1 end {1}, case when abs(nvl(o_unclose_amt,0.0)-nvl(tbd_unclose_amt,0.0))<=0.01 then 0 else 1 end {2} from {0} """ sqlOcLong = sqlOcTemp.format("temp_all_long_return", "lr_equal_exception", "lmv_euqal_exception") df_oc_long = self.sparkSession.sql(sqlOcLong) dfShort = full_outer_join(dfOShortreturn, dfTbdShortreturn, ["trade_id", "secu_acc_id", "prd_no"]) dfShort.registerTempTable("temp_all_short_return") sqlOcShort = sqlOcTemp.format("temp_all_short_return", "sr_equal_exception", "smv_equal_exception") df_oc_short = self.sparkSession.sql(sqlOcShort) df2 = full_outer_join(df_oc_long, df_oc_short, ["trade_id", "secu_acc_id", "prd_no"]) df3 = full_outer_join(dfall, df2, ["trade_id", "secu_acc_id", "prd_no"]) df3.registerTempTable("temp_stockotbdcheckexception") sql = """select trade_id, secu_acc_id, '{0}' busi_date, prd_no, nvl(tbd_return,0) tbd_return, nvl(tbd_exception_label,0) tbd_exception_label, nvl(o_return,0) o_return, nvl(o_exception_label,0) o_exception_label, nvl(lr_equal_exception,0) lr_equal_exception, nvl(lmv_euqal_exception,0) lmv_euqal_exception, nvl(sr_equal_exception,0) sr_equal_exception, nvl(smv_equal_exception,0) smv_equal_exception from temp_stockotbdcheckexception """ finalDf = self.sparkSession.sql(sql.format(enddate)) self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)
def daily_compute(self, startdate=None, enddate=None): # cal:o_return,o_exception_label,c_return,c_exception_label #获取T日的数据 df_o, df_o_long, df_o_short = self._cal_return(enddate) startdate = get_date(self.date_order, self.order_date, enddate, -1) df_o_1, df_o_long_1, df_o_short_1\ = self._cal_return(startdate,oreturn="o_return_1", oept="o_exception_label_1", ounclose="o_unclose_amt_1", type='unclose') df_dc_long = self._cal_dc(enddate, 'long_related') df_dc_short = self._cal_dc(enddate, 'short_related') df_dc = df_dc_long.union(df_dc_short).groupBy( ["trade_id", "secu_acc_id", "prd_no"]).agg( fn.sum("dc_return").alias("dc_return"), fn.max("dc_exception_label").alias("dc_exception_label")) # cal:long/ df1 = full_outer_join(df_o, df_o_1, ["trade_id", "secu_acc_id", "prd_no"]) df1.registerTempTable("temp_o_return_table") sqlOTemp = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0.0)-nvl(o_return_1,0.0) {1}, nvl(o_unclose_amt,0.0) {2}, nvl(o_exception_label,o_exception_label_1) o_exception_label from {0} """ dfOreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_return_table", "o_return", "o_unclose_amt")) dfOLong = full_outer_join(df_o_long, df_o_long_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOLong.registerTempTable("temp_o_long_return_table") dfOlongreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_long_return_table", "o_return", "o_unclose_amt")) dfOShort = full_outer_join(df_o_short, df_o_short_1, ["trade_id", "secu_acc_id", "prd_no"]) dfOShort.registerTempTable("temp_o_short_return_table") dfOShortreturn = self.sparkSession.sql( sqlOTemp.format("temp_o_short_return_table", "o_return", "o_unclose_amt")) dfall = full_outer_join(dfOreturn, df_dc, ["trade_id", "secu_acc_id", "prd_no"]) dfall.registerTempTable("temp_all_return") sql_all_return = """ select trade_id,secu_acc_id,prd_no, nvl(o_return,0) o_return, nvl(dc_return,0) dc_return, nvl(o_exception_label,0) o_exception_label, nvl(dc_exception_label,0) dc_exception_label from {0} """.format("temp_all_return") dfall = self.sparkSession.sql(sql_all_return) dfLong = full_outer_join(dfOlongreturn, df_dc_long, ["trade_id", "secu_acc_id", "prd_no"]) dfLong.registerTempTable("temp_all_long_return") sqlOcTemp = """ select trade_id,secu_acc_id,prd_no, case when abs(nvl(o_return,0.0)-nvl(dc_return,0.0))<=0.01 then 0 else 1 end {1}, case when abs(nvl(o_unclose_amt,0.0)-nvl(dc_unclose_amt,0.0))<=0.01 then 0 else 1 end {2} from {0} """ sqlOcLong = sqlOcTemp.format("temp_all_long_return", "lr_equal_exception", "lmv_euqal_exception") df_oc_long = self.sparkSession.sql(sqlOcLong) dfShort = full_outer_join(dfOShortreturn, df_dc_short, ["trade_id", "secu_acc_id", "prd_no"]) dfShort.registerTempTable("temp_all_short_return") sqlOcShort = sqlOcTemp.format("temp_all_short_return", "sr_equal_exception", "smv_equal_exception") df_oc_short = self.sparkSession.sql(sqlOcShort) df2 = full_outer_join(df_oc_long, df_oc_short, ["trade_id", "secu_acc_id", "prd_no"]) df3 = full_outer_join(dfall, df2, ["trade_id", "secu_acc_id", "prd_no"]) df3.registerTempTable("temp_stockodccheckexception") sql = """select trade_id, secu_acc_id, '{0}' busi_date, prd_no, nvl(o_return,0) o_return, nvl(o_exception_label,0) o_exception_label, nvl(dc_return,0) dc_return, nvl(dc_exception_label,0) dc_exception_label, nvl(lr_equal_exception,0) lr_equal_exception, nvl(lmv_euqal_exception,0) lmv_euqal_exception, nvl(sr_equal_exception,0) sr_equal_exception, nvl(smv_equal_exception,0) smv_equal_exception from temp_stockodccheckexception """.format(enddate) finalDf = self.sparkSession.sql(sql) self._cal_detail(finalDf, enddate) self._stat_report(finalDf, enddate)
def daily_compute(self, startdate=None, enddate=None): df_o_close = self._cal_data(enddate, self.fdata, self.stock_close_o_trade_long_data, self.stock_close_o_trade_short_data, 'o_close_rr_max', 'o_close_rr_min', 'o_close_wt_max', 'o_close_wt_min') df_o_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_o_trade_long_data, self.stock_unclose_o_trade_short_data, 'o_unclose_rr_max', 'o_unclose_rr_min', 'o_unclose_wt_max', 'o_unclose_wt_min') df1 = full_outer_join(df_o_close, df_o_unclose, ["busi_date", "exception_label", "trd_type"]).persist(StorageLevel.DISK_ONLY) df_c_close = self._cal_data(enddate, self.fdata, self.stock_close_c_trade_long_data, self.stock_close_c_trade_short_data, 'c_close_rr_max', 'c_close_rr_min', 'c_close_wt_max', 'c_close_wt_min') df_c_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_c_trade_long_data, self.stock_unclose_c_trade_short_data, 'c_unclose_rr_max', 'c_unclose_rr_min', 'c_unclose_wt_max', 'c_unclose_wt_min') df2 = full_outer_join(df_c_close, df_c_unclose, ["busi_date", "exception_label", "trd_type"]).persist(StorageLevel.DISK_ONLY) df_day_close = self._cal_data(enddate, self.fdata, self.stock_close_tradebyday_long_data, self.stock_close_tradebyday_short_data, 'tbd_close_rr_max', 'tbd_close_rr_min', 'tbd_close_wt_max', 'tbd_close_wt_min') df_day_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_tradebyday_long_data, self.stock_unclose_tradebyday_short_data, 'tbd_unclose_rr_max', 'tbd_unclose_rr_min', 'tbd_unclose_wt_max', 'tbd_unclose_wt_min') df3 = full_outer_join(df_day_close, df_day_unclose, ["busi_date", "exception_label", "trd_type"]).persist(StorageLevel.DISK_ONLY) df_prd_close = self._cal_data(enddate, self.fdata, self.stock_close_prd_long_data, self.stock_close_prd_short_data, 'prd_close_rr_max', 'prd_close_rr_min', 'prd_close_wt_max', 'prd_close_wt_min', wt='holding_term') df_prd_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_prd_long_data, self.stock_unclose_prd_short_data, 'prd_unclose_rr_max', 'prd_unclose_rr_min', 'prd_unclose_wt_max', 'prd_unclose_wt_min', wt='holding_term') df4 = full_outer_join(df_prd_close, df_prd_unclose, ["busi_date", "exception_label", "trd_type"]).persist(StorageLevel.DISK_ONLY) df12 = full_outer_join(df1, df2, ["busi_date", "exception_label", "trd_type"]) df34 = full_outer_join(df3, df4, ["busi_date", "exception_label", "trd_type"]) df = full_outer_join(df12, df34, ["busi_date", "exception_label", "trd_type"]).persist(StorageLevel.DISK_ONLY) df.registerTempTable("tmp_stocktrdrrcheckexception") sql = """ select exception_label, trd_type, busi_date, nvl(o_close_rr_max,0) o_close_rr_max, nvl(o_close_rr_min,0) o_close_rr_min, nvl(o_unclose_rr_max,0) o_unclose_rr_max, nvl(o_unclose_rr_min,0) o_unclose_rr_min, nvl(c_close_rr_max,0) c_close_rr_max, nvl(c_close_rr_min,0) c_close_rr_min, nvl(c_unclose_rr_max,0) c_unclose_rr_max, nvl(c_unclose_rr_min,0) c_unclose_rr_min, nvl(tbd_close_rr_max,0) tbd_close_rr_max, nvl(tbd_close_rr_min,0) tbd_close_rr_min, nvl(tbd_unclose_rr_max,0) tbd_unclose_rr_max, nvl(tbd_unclose_rr_min,0) tbd_unclose_rr_min, nvl(prd_close_rr_max,0) prd_close_rr_max, nvl(prd_close_rr_min,0) prd_close_rr_min, nvl(prd_unclose_rr_max,0) prd_unclose_rr_max, nvl(prd_unclose_rr_min,0) prd_unclose_rr_min, nvl(o_close_wt_max,0) o_close_wt_max, nvl(o_close_wt_min,0) o_close_wt_min, nvl(o_unclose_wt_max,0) o_unclose_wt_max, nvl(o_unclose_wt_min,0) o_unclose_wt_min, nvl(c_close_wt_max,0) c_close_wt_max, nvl(c_close_wt_min,0) c_close_wt_min, nvl(c_unclose_wt_max,0) c_unclose_wt_max, nvl(c_unclose_wt_min,0) c_unclose_wt_min, nvl(tbd_close_wt_max,0) tbd_close_wt_max, nvl(tbd_close_wt_min,0) tbd_close_wt_min, nvl(tbd_unclose_wt_max,0) tbd_unclose_wt_max, nvl(tbd_unclose_wt_min,0) tbd_unclose_wt_min, nvl(prd_close_wt_max,0) prd_close_wt_max, nvl(prd_close_wt_min,0) prd_close_wt_min, nvl(prd_unclose_wt_max,0) prd_unclose_wt_max, nvl(prd_unclose_wt_min,0) prd_unclose_wt_min from tmp_stocktrdrrcheckexception """ finalDf = self.sparkSession.sql(sql).repartition(5) save_data(self.sparkSession, self.adata, self.taget_table, enddate, finalDf)