def do_action(self, action, pre_portfolio_value, only_update): sell_price, buy_price = self.get_action_price(action) sell_prices, buy_prices = [sell_price], [buy_price] if only_update: sell_prices, buy_prices = [0] * self.n, [0] * self.n divide_rate = self.market.get_divide_rate(self.code, self.current_date) logger.debug("divide_rate: %.4f" % divide_rate) self.portfolio.update_before_trade(divide_rate) cash_change = 0 if not only_update: # 全仓卖出 sell_cash_change, ok = self.sell(0, sell_price, 0.0) # 全仓买进 buy_cash_change, ok = self.buy(0, buy_price, 1.0) cash_change = buy_cash_change + sell_cash_change logger.debug("do_action: time_id: %d, %s, cash_change: %.1f" % (self.current_time_id, self.code, cash_change)) close_price = self.market.get_close_price(self.code, self.current_date) self.portfolio.update_after_trade( close_price=close_price, cash_change=cash_change, pre_portfolio_value=pre_portfolio_value) return sell_prices, buy_prices
def sell(self, id, price, target_pct): # id: code id code = self.codes[id] logger.debug("sell %s, bid price: %.2f" % (code, price)) ok, price = self.market.sell_check(code=code, datestr=self.current_date, bid_price=price) if ok: # 全仓卖出 cash_change, price, vol = self.portfolios[id].order_target_percent( percent=target_pct, price=price, pre_portfolio_value=self.portfolio_value, current_cash=self.cash) self.cash += cash_change if vol != 0: self.info["orders"].append([ "sell", code, round(cash_change, 1), round(price, 2), vol ]) logger.debug("sell %s target_percent: 0, cash_change: %.3f" % (code, cash_change)) return cash_change, ok return 0, ok
def buy(self, id, price, target_pct): # id: code id code = self.codes[id] logger.debug("buy %s, bid_price: %.2f" % (code, price)) ok, price = self.market.buy_check(code=code, datestr=self.current_date, bid_price=price) pre_cash = self.cash if ok: # 分仓买进 cash_change, price, vol = self.portfolios[id].order_target_percent( percent=target_pct, price=price, pre_portfolio_value=self.portfolio_value, current_cash=self.cash) self.cash += cash_change if vol != 0: self.info["orders"].append( ["buy", code, round(cash_change, 1), round(price, 2), vol]) logger.debug("buy %s cash: %.1f, cash_change: %1.f" % (code, pre_cash, cash_change)) return cash_change, ok return 0, ok
def get_action_price(self, v_price, code): pre_close = self.market.get_pre_close_price(code, self.current_date) logger.debug("%s %s pre_close: %.2f" % (self.current_date, code, pre_close)) # scale [-1, 1] to [-0.1, 0.1] pct = v_price * 0.1 price = round(pre_close * (1 + pct), 2) return price
def get_action_price(self, action, code): pre_close = self.market.get_pre_close_price(code, self.current_date) logger.debug("%s %s pre_close: %.2f" % (self.current_date, code, pre_close)) [v_sell, v_buy] = action # scale [-1, 1] to [-0.1, 0.1] pct_sell, pct_buy = v_sell * 0.1, v_buy * 0.1 sell_price = round(pre_close * (1 + pct_sell), 2) buy_price = round(pre_close * (1 + pct_buy), 2) return sell_price, buy_price
def do_action(self, action, pre_portfolio_value, only_update): """ only_update: 仅更新Portfolio, 不做操作, 即:buy_and_hold策略 """ cash_change = 0 # 更新拆分信息 for i in range(self.n): code = self.codes[i] divide_rate = self.market.get_divide_rate(code, self.current_date) self.portfolios[i].update_before_trade(divide_rate) sell_prices, buy_prices = [], [] if only_update: sell_prices, buy_prices = [0] * self.n, [0] * self.n if not only_update: # 卖出 for i in range(self.n): code = self.codes[i] act_i = action[4 * i:4 * (i + 1)] sell_price = self.get_action_price(act_i[0], code) sell_prices.append(sell_price) target_pct = self.get_action_target_pct(act_i[1]) sell_cash_change, ok = self.sell(i, sell_price, target_pct) cash_change += sell_cash_change # 买进 for i in range(self.n): code = self.codes[i] act_i = action[4 * i:4 * (i + 1)] buy_price = self.get_action_price(act_i[2], code) buy_prices.append(buy_price) target_pct = self.get_action_target_pct(act_i[3]) buy_cash_change, ok = self.buy(i, buy_price, target_pct) cash_change += buy_cash_change logger.debug("do_action: time_id: %d, cash_change: %.1f" % (self.current_time_id, cash_change)) # update for i in range(self.n): code = self.codes[i] close_price = self.market.get_close_price(code, self.current_date) self.portfolios[i].update_after_trade( close_price=close_price, cash_change=cash_change, pre_portfolio_value=pre_portfolio_value) return sell_prices, buy_prices
def do_action(self, action, pre_portfolio_value, only_update): cash_change = 0 # 更新拆分信息 for i in range(self.n): code = self.codes[i] divide_rate = self.market.get_divide_rate(code, self.current_date) self.portfolios[i].update_before_trade(divide_rate) sell_prices, buy_prices = [], [] if only_update: sell_prices, buy_prices = [0] * self.n, [0] * self.n if not only_update: # 卖出 for i in range(self.n): code = self.codes[i] act_i = action[2 * i:2 * (i + 1)] sell_price, _ = self.get_action_price(act_i, code) sell_prices.append(sell_price) sell_cash_change, ok = self.sell(i, sell_price, 0) cash_change += sell_cash_change # 买进 for i in range(self.n): code = self.codes[i] act_i = action[2 * i:2 * (i + 1)] _, buy_price = self.get_action_price(act_i, code) buy_prices.append(buy_price) buy_cash_change, ok = self.buy(i, buy_price, self.avg_percent) cash_change += buy_cash_change logger.debug("do_action: time_id: %d, cash_change: %.1f" % (self.current_time_id, cash_change)) # update for i in range(self.n): code = self.codes[i] close_price = self.market.get_close_price(code, self.current_date) self.portfolios[i].update_after_trade( close_price=close_price, cash_change=cash_change, pre_portfolio_value=pre_portfolio_value) return sell_prices, buy_prices
def buy_check(self, code='', datestr='', bid_price=None): # 返回:OK, 成交价 ok = False # 停牌 if self.is_suspended(code, datestr): return ok, 0 # 获取当天标的信息 [open, high, low, pct_change ] = self.codes_history[code].loc[datestr, ["open", "high", "low", "pct_chg"]] # 涨停封板, 无法买入 if low == high and pct_change > self.top_pct_change: logger.debug(u"sell_check %s %s 涨停法买进" % (code, datestr)) return ok, 0 # 买入竞价低于最低价,不能成交 if bid_price < low: return ok, 0 # 买入竞价高于最低价, 可以成交 if bid_price >= low: return True, min(bid_price, high)
def step(self, action, only_update=False): """ only_update为True时,表示buy_and_hold策略,可作为一种baseline策略 """ self.action = action self.info = {"orders": []} logger.debug("=" * 50 + "%s" % self.current_date + "=" * 50) logger.debug("current_time_id: %d, portfolio: %.1f" % (self.current_time_id, self.portfolio_value)) logger.debug("step action: %s" % str(action)) # 到最后一天 if self.current_date == self.dates[-1]: self.done = True pre_portfolio_value = self.portfolio_value sell_prices, buy_prices = self.do_action(action, pre_portfolio_value, only_update) self.update_portfolio() self.update_value_percent() self.update_reward(sell_prices, buy_prices) self.obs = self._next() self.info = { "orders": self.info["orders"], "current_date": self.current_date, "portfolio_value": round(self.portfolio_value / self.investment, 3), "daily_pnl": round(self.daily_pnl, 1), "reward": self.reward } return self.obs, self.reward, self.done, self.info, self.rewards
def sell_check(self, code='', datestr='', bid_price=None): # 返回:OK, 成交价 ok = False # 停牌 if self.is_suspended(code, datestr): return ok, 0 # 获取当天标的信息 [open, high, low, pct_change ] = self.codes_history[code].loc[datestr, ["open", "high", "low", "pct_chg"]] # 跌停封板, 不能卖出 if low == high and pct_change < -self.top_pct_change: logger.debug(u"sell_check %s %s 跌停无法卖出" % (code, datestr)) return ok, 0 # 卖出竞价高于最高价,不可以成交 if bid_price > high: return ok, 0 # 卖出竞价在最低最高价之间, 可以成交,按出价成交 # NOTE: 这里卖出竞价低于最低价时,可以成交,按最低价成交 if bid_price <= high: ok = True return ok, max(bid_price, low)