async def sync_calendar(): """从上游服务器获取所有交易日,并计算出周线帧和月线帧 Returns: """ trade_days = await aq.get_all_trade_days() if trade_days is None or len(trade_days) == 0: logger.warning("failed to fetch trade days.") return None tf.day_frames = [tf.date2int(x) for x in trade_days] weeks = [] last = trade_days[0] for cur in trade_days: if cur.weekday() < last.weekday() or (cur - last).days >= 7: weeks.append(last) last = cur if weeks[-1] < last: weeks.append(last) tf.week_frames = [tf.date2int(x) for x in weeks] await cache.save_calendar("week_frames", map(tf.date2int, weeks)) months = [] last = trade_days[0] for cur in trade_days: if cur.day < last.day: months.append(last) last = cur months.append(last) tf.month_frames = [tf.date2int(x) for x in months] await cache.save_calendar("month_frames", map(tf.date2int, months)) logger.info("trade_days is updated to %s", trade_days[-1])
async def enter_stock_pool(self, code, frame, frame_type: FrameType, **kwargs): if frame_type in tf.day_level_frames: iframe = tf.date2int(frame) else: iframe = tf.time2int(frame) kwargs.update({"frame_type": frame_type.value}) await cache.sys.hset(f"plots.{self.name}.pool", f"{iframe}:{code}", json.dumps(kwargs)) await mm.evaluate('momentum', { "name": 'frame', "frame_type": '30m' }, code=code, frame_type=frame_type.value, flag="both", win=5) kwargs.update({ "code": code, "frame": iframe, "plot": self.name, "plot_name": self.display_name }) await emit.emit(Events.plot_pool, kwargs)
def get_next_fire_time(self, previous_fire_time, now): if previous_fire_time is not None: fire_time = previous_fire_time + self.interval else: fire_time = now if tf.date2int(fire_time.date()) not in tf.day_frames: ft = tf.day_shift(now, 1) fire_time = datetime.datetime(ft.year, ft.month, ft.day, 9, 30) return fire_time minutes = fire_time.hour * 60 + fire_time.minute if minutes < 570: fire_time = fire_time.replace(hour=9, minute=30, second=0, microsecond=0) elif 690 < minutes < 780: fire_time = fire_time.replace(hour=13, minute=0, second=0, microsecond=0) elif minutes > 900: ft = tf.day_shift(fire_time, 1) fire_time = datetime.datetime(ft.year, ft.month, ft.day, 9, 30) return fire_time
async def save_turnover(self, data: np.array, date: datetime.date): frame = tf.date2int(date) pl = self.security.pipeline() [ pl.hset(f"{code}:turnover", frame, turnover.item()) for code, turnover in data ] await pl.execute()
async def _save_bars( self, code: str, bars: np.ndarray, frame_type: FrameType, head: Frame = None, tail: Frame = None, ): if frame_type not in [ FrameType.MIN1, FrameType.MIN5, FrameType.MIN15, FrameType.MIN30, FrameType.MIN60, ]: head = tf.date2int(head or bars["frame"][0]) tail = tf.date2int(tail or bars["frame"][-1]) frame_convert_func = tf.date2int else: head = tf.time2int(head or bars["frame"][0]) tail = tf.time2int(tail or bars["frame"][-1]) frame_convert_func = tf.time2int pipeline = self.security.pipeline() # the cache is empty or error during syncing, save all bars key = f"{code}:{frame_type.value}" # docme: it takes 0.05 secs to save 1000 bars, compares to 0.19 secs if we use # the comment out codes: # for row in bars: # frame, o, h, l, c, v, a, fq = row # frame = frame_convert_func(frame) # value = f"{o:.2f} {h:.2f} {l:.2f} {c:.2f} {v} {a:.2f} {fq:.2f}" # pipeline.hset(key, frame, value) hmset = { frame_convert_func(frame): f"{o:.2f} {h:.2f} {l:.2f} {c:.2f} {v} {a:.2f} {fq:.2f}" for frame, o, h, l, c, v, a, fq in bars } pipeline.hmset_dict(key, hmset) pipeline.hset(key, "head", head) pipeline.hset(key, "tail", tail) await pipeline.execute()
async def _build_train_data(self, frame_type: FrameType, n: int, max_error: float = 0.01): """ 从最近的符合条件的日期开始,遍历股票,提取特征和标签,生成数据集。 Args: n: 需要采样的样本数 Returns: """ watch_win = 5 max_curve_len = 5 max_ma_win = 20 # y_stop = arrow.get('2020-7-24').date() y_stop = tf.floor(arrow.now(tz=cfg.tz), frame_type) y_start = tf.shift(y_stop, -watch_win + 1, frame_type) x_stop = tf.shift(y_start, -1, frame_type) x_start = tf.shift(x_stop, -(max_curve_len + max_ma_win - 1), frame_type) data = [] while len(data) < n: for code in Securities().choose(['stock']): #for code in ['000601.XSHE']: try: sec = Security(code) x_bars = await sec.load_bars(x_start, x_stop, FrameType.DAY) y_bars = await sec.load_bars(y_start, y_stop, FrameType.DAY) # [a, b, axis] * 3 x = self.extract_features(x_bars, max_error) if len(x) == 0: continue y = np.max(y_bars['close']) / x_bars[-1]['close'] - 1 if np.isnan(y): continue feature = [code, tf.date2int(x_stop)] feature.extend(x) data.append(feature) except Exception as e: logger.warning("Failed to extract features for %s (%s)", code, x_stop) logger.exception(e) if len(data) >= n: break if len(data) % 500 == 0: logger.info("got %s records.", len(data)) y_stop = tf.day_shift(y_stop, -1) y_start = tf.day_shift(y_start, -1) x_stop = tf.day_shift(y_start, -1) x_start = tf.day_shift(x_start, -1) return data
async def get_turnover( self, code: Union[str, List[str]], date: datetime.date) -> Union[List[float], float, None]: frame = tf.date2int(date) if isinstance(code, str): key = f"{code}:turnover" turnover = await self.security.hget(key, frame) if turnover is not None: return float(turnover) else: codes = code pl = self.security.pipeline() [pl.hget(f"{code}:turnover", frame) for code in codes] recs = await pl.execute() return [float(rec) for rec in recs] return None
async def build_train_data(self, save_to: str, frame_type: str = '1d', n=10) -> List: await self.init() frame_type = FrameType(frame_type) data = await self._build_train_data(frame_type, n) date = tf.date2int(arrow.now().date()) path = os.path.abspath(save_to) path = os.path.join(path, f"momemtum.{frame_type.value}.tsv") with open(path, "w") as f: cols = "code,date,a5,b5,err5,a10,b10,err10,a20,b20,err20,y".split(",") f.writelines("\t".join(cols)) f.writelines("\n") for item in data: f.writelines("\t".join(map(lambda x: str(x), item))) f.writelines("\n") return data
async def distribution(self): # 涨停、跌停 zt, dt = 0, 0 codes = Securities().choose(['stock']) end = arrow.now(cfg.tz).floor('minute').datetime pct = [] async for code, bars in Security.load_bars_batch( codes, end, 2, FrameType.DAY): c1, c0 = bars[-2:]['close'] if (c0 + 0.01) / c1 - 1 > 0.1: zt += 1 if (c0 - 0.01) / c1 - 1 < -0.1: dt += 1 pct.append(c0 / c1 - 1) # 分布 cuts = np.histogram( pct, bins=[-0.2, -0.1, -0.07, -0.03, 0, 0.03, 0.07, 0.1, 0.2]) self.price_change_history.append((zt, dt, cuts)) if len(self.price_change_history) == 8: self.price_change_history.pop(0) now = arrow.now(tz=cfg.tz) if now.hour >= 15: dt = tf.date2int(now) await cache.sys.hset( f"glance{dt}", "distribution", json.dumps({ "zt": zt, "dt": dt, "cuts": cuts })) return zt, dt, cuts
async def start_validation(): """ 将待校验的证券按CPU个数均匀划分,创建与CPU个数相同的子进程来执行校验。校验的起始时间由数据 库中jobs.bars_validation.range.start和jobs.bars_validation.range.stop来决定,每次校验 结束后,将jobs.bars_validation.range.start更新为校验截止的最后交易日。如果各个子进程报告 的截止交易日不一样(比如发生了异常),则使用最小的交易日。 """ global validation_errors, no_validation_error_days validation_errors = [] secs = Securities() cpu_count = psutil.cpu_count() # to check if the range is right pl = cache.sys.pipeline() pl.get("jobs.bars_validation.range.start") pl.get("jobs.bars_validation.range.end") start, end = await pl.execute() if start is None: if cfg.omega.validation.start is None: logger.warning( "start of validation is not specified, validation aborted.") return else: start = tf.date2int(arrow.get(cfg.omega.validation.start)) else: start = int(start) if end is None: end = tf.date2int(tf.floor(arrow.now().date(), FrameType.DAY)) else: end = int(end) assert start <= end no_validation_error_days = set(tf.day_frames[(tf.day_frames >= start) & (tf.day_frames <= end)]) # fixme: do validation per frame_type # fixme: test fail. Rewrite this before 0.6 releases codes = secs.choose(cfg.omega.sync) await cache.sys.delete("jobs.bars_validation.scope") await cache.sys.lpush("jobs.bars_validation.scope", *codes) logger.info("start validation %s secs from %s to %s.", len(codes), start, end) emit.register(Events.OMEGA_VALIDATION_ERROR, on_validation_error) t0 = time.time() code = ("from omega.core.sanity import do_validation_process_entry; " "do_validation_process_entry()") procs = [] for i in range(cpu_count): proc = subprocess.Popen([sys.executable, "-c", code], env=os.environ) procs.append(proc) timeout = 3600 while timeout > 0: await asyncio.sleep(2) timeout -= 2 for proc in procs: proc.poll() if all([proc.returncode is not None for proc in procs]): break if timeout <= 0: for proc in procs: try: os.kill(proc.pid, signal.SIGTERM) except Exception: pass # set next start point validation_days = set(tf.day_frames[(tf.day_frames >= start) & (tf.day_frames <= end)]) diff = validation_days - no_validation_error_days if len(diff): last_no_error_day = min(diff) else: last_no_error_day = end await cache.sys.set("jobs.bars_validation.range.start", last_no_error_day) elapsed = time.time() - t0 logger.info( "Validation cost %s seconds, validation will start at %s next time", elapsed, last_no_error_day, )
async def do_validation(secs: List[str] = None, start: str = None, end: str = None): """对列表secs中指定的证券行情数据按start到end指定的时间范围进行校验 Args: secs (List[str], optional): [description]. Defaults to None. start (str, optional): [description]. Defaults to None. end (str, optional): [description]. Defaults to None. Returns: [type]: [description] """ logger.info("start validation...") report = logging.getLogger("validation_report") cfg = cfg4py.init(get_config_dir(), False) await emit.start(engine=emit.Engine.REDIS, dsn=cfg.redis.dsn, start_server=True) await omicron.init() start = int(start or await cache.sys.get("jobs.bars_validation.range.start")) if end is None: end = tf.date2int(arrow.now().date()) else: end = int(end or await cache.sys.get("jobs.bars_validation.range.stop")) if secs is None: async def get_sec(): return await cache.sys.lpop("jobs.bars_validation.scope") else: async def get_sec(): return secs.pop() if len(secs) else None errors = 0 while code := await get_sec(): try: for day in tf.day_frames[(tf.day_frames >= start) & (tf.day_frames <= end)]: expected = await get_checksum(day) if expected and expected.get(code): actual = await calc_checksums(tf.int2date(day), [code]) d1 = actual.get(code) d2 = expected.get(code) missing1 = d2.keys() - d1 # local has no checksum missing2 = d1.keys() - d2 # remote has no checksum mismatch = {k for k in d1.keys() & d2 if d1[k] != d2[k]} for k in missing1: info = ( ValidationError.LOCAL_MISS, day, code, k, d1.get(k), d2.get(k), ) report.info("%s,%s,%s,%s,%s,%s", *info) await emit.emit(Events.OMEGA_VALIDATION_ERROR, info) for k in missing2: info = ( ValidationError.REMOTE_MISS, day, code, k, d1.get(k), d2.get(k), ) report.info("%s,%s,%s,%s,%s,%s", *info) await emit.emit(Events.OMEGA_VALIDATION_ERROR, info) for k in mismatch: info = ( ValidationError.MISMATCH, day, code, k, d1.get(k), d2.get(k), ) report.info("%s,%s,%s,%s,%s,%s", *info) await emit.emit(Events.OMEGA_VALIDATION_ERROR, info) else: logger.error("checksum for %s not found.", day) info = (ValidationError.NO_CHECKSUM, day, None, None, None, None) report.info("%s,%s,%s,%s,%s,%s", *info) await emit.emit(Events.OMEGA_VALIDATION_ERROR, info) except Exception as e: logger.exception(e) errors += 1
async def scan(self, end: Frame = None, frame_type: FrameType = FrameType.DAY, codes=None, adv_limit=0.3): """ Args: end: adv_limit: 不包括在win周期内涨幅超过adv_limit的个股 Returns: """ win = 20 secs = Securities() end = end or tf.floor(arrow.now(), FrameType.DAY) results = [] holdings = await cache.sys.smembers("holdings") for i, code in enumerate(secs.choose(['stock'])): try: if code in holdings: # 如果已经持仓,则不跟踪评估 continue sec = Security(code) if sec.code.startswith('688') or sec.display_name.find('ST') != -1: continue start = tf.day_shift(end, -270) bars = await sec.load_bars(start, end, FrameType.DAY) close = bars['close'] ma5 = signal.moving_average(close, 5) ma250 = signal.moving_average(close, 250) cross, idx = signal.cross(ma5[-win:], ma250[-win:]) cross_day = bars[-win + idx]['frame'] if cross != 1: continue ma20 = signal.moving_average(close, 20) ma120 = signal.moving_average(close, 120) # 如果上方还有月线和ma120线,则不发出信号,比如广州浪奇 2020-7-23,泛海控股2020-8-3 if close[-1] < ma120[-1] or close[-1] < ma20[-1]: continue # 计算20日以来大阳次数。如果不存在大阳线,认为还未到上涨时机,跳过 grl, ggl = features.count_long_body(bars[-20:]) if grl == 0: continue # # # 计算突破以来净余买量(用阳线量减去阴线量来模拟,十字星不计入) # bsc = bars[-10 + idx:] # bars_since_open: included both side # ups = bsc[bsc['close'] > (bsc['open'] * 1.01)] # downs = bsc[bsc['open'] > (bsc['close'] * 0.99)] # balance = np.sum(ups['volume']) - np.sum(downs['volume']) # pc = await sec.price_change(cross_day, tf.day_shift(cross_day, 5), # FrameType.DAY, return_max=True) # faf = int(win - idx) # frames after fired adv = await sec.price_change(tf.day_shift(end, -win), end, FrameType.DAY, False) if adv > adv_limit: continue logger.info(f"{sec}上穿年线\t{cross_day}\t{faf}") await cache.sys.hmset_dict("plots.crossyear", {code: json.dumps({ "fired_at": tf.date2int(end), "cross_day": tf.date2int(cross_day), "faf": faf, "grl": grl, "ggl": ggl, "status": 0 # 0 - generated by plots 1 - disabled manually })}) results.append( [sec.display_name, tf.date2int(end), tf.date2int(cross_day), faf, grl, ggl]) except Exception as e: logger.exception(e) logger.info("done crossyear scan.") return results
async def train(self, save_to: str, dataset: str = None, n: int = 100): """ Args: dataset: Returns: """ await self.init() save_to = os.path.abspath(save_to) if not os.path.exists(save_to): logger.warning("invalid path: %s", save_to) return date = tf.date2int(arrow.now().date()) save_to = f"{save_to}/momemtum.{date}.svm" x_train, y_train = [], [] x_test, y_test = [], [] if os.path.exists(dataset): with open(dataset, 'r') as f: data = f.readlines()[1:n + 1] random.shuffle(data) n_train = int(len(data) * 0.8) for line in data[:n_train]: fields = line.strip("\n").split("\t") x_train.append(list(map(lambda x: float(x), fields[2:-1]))) y_train.append(float(fields[-1])) for line in data[n_train:]: fields = line.strip("\n").split("\t") x_test.append(list(map(lambda x: float(x), fields[2:-1]))) y_test.append(float(fields[-1])) else: data = await self._build_train_data(n) random.shuffle(data) n_train = int(len(data) * 0.8) for rec in data[:n_train]: x_train.append(rec[2:-1]) y_train.append(rec[-1]) for rec in data[n_train:]: x_test.append(rec[2:-1]) y_test.append(rec[-1]) assert len(x_train) == len(y_train) logger.info("train data loaded, %s records in total", len(x_train)) params = { 'C': [1e-3, 1e-2, 1e-1, 1, 10], 'kernel': ('linear', 'poly'), 'gamma': [0.001, 0.005, 0.1, 0.15, 0.20, 0.23, 0.27], 'epsilon': [1e-4, 1e-3, 1e-2, 1e-1, 1, 10], 'degree': [2, 3] } clf = GridSearchCV(svm.SVR(verbose=True), params, n_jobs=-1) clf.fit(x_train, y_train) logger.info("Best: %s, %s, %s", clf.best_estimator_, clf.best_score_, clf.best_params_) self.model = clf.best_estimator_ y_pred = self.model.predict(x_test) score = rmse(y_test, y_pred) logger.info("training score is:%s", score) print("y_true, y_pred") for yt, yp in zip(y_test[:20], y_pred[:20]): print(yt, yp) with open(save_to, "wb") as f: dump(self.model, f) await self.exit()