def fix_time(self, stock_list: list or str, factors: list or f, trade_date: datetime.date or str): # 格式化参数 start = time.time() stock_list = convert_11code(stock_list) mark2factor_map = dis_mark2factor_map(factors, self.factor2mark_map) trade_date = convert2datetime(trade_date) # 用股票列表来初始化 ret ret = numpy.array(list(zip(stock_list)), dtype=[('stock', 'U11')]) columns = list() for mark in mark2factor_map: # instance 是一个实例化的三戟叉对象 instance = self.mark2instance_map.get(mark, None) if instance: sub_ret = instance.fix_time(stock_list, mark2factor_map[mark], trade_date) sub_column = list(sub_ret.dtype.names)[1:] columns = columns + sub_column # 扩充columns名称 sub_ret = sub_ret[sub_column] # 只取字段值 # 将ret结果不断merge ret = rfn.merge_arrays((ret, sub_ret), asrecarray=True, flatten=True) ret.dtype.names = ['stock'] + columns end = time.time() print('fix_time耗时:', end - start) # 把[numpy.recarray]对象转化为[numpy.ndarray]对象 return numpy.array(ret)
def fix_symbol(self, stock: str, factors: list or f, start_date: datetime.date or str, end_date: datetime.date or str): start = time.time() # 格式化参数 stock = convert_11code(stock)[0] start_date = convert2datetime(start_date) end_date = convert2datetime(end_date) mark2factor_map = dis_mark2factor_map(factors, self.factor2mark_map) # 初始化 ret calendar = TradeCalendar() calendar = calendar.calendar(start_date, end_date) calendar = calendar[calendar['trade']]['date'] ret = numpy.array(calendar, dtype=[('date', 'U10')]) columns = list() for mark in mark2factor_map: instance = self.mark2instance_map.get(mark, None) if instance: sub_ret = instance.fix_symbol(stock, mark2factor_map[mark], start_date, end_date) sub_column = list(sub_ret.dtype.names)[1:] columns = columns + sub_column sub_ret = sub_ret[sub_column] ret = rfn.merge_arrays((ret, sub_ret), asrecarray=True, flatten=True) ret.dtype.names = ['date'] + columns end = time.time() print('fix_symbol耗时:', end - start) return numpy.array(ret)
def fix_factor(self, stock_list: list, factor: f or list, start_date: str or datetime.date, end_date: str or datetime.date): # 格式化参数 stock_list = convert_11code(stock_list) collection2factor_map = dis_collection2factor_map(factor, self.factor2collection_map) start_date = convert2datetime(start_date) end_date = convert2datetime(end_date) end_date = end_date + datetime.timedelta(hours=23, minutes=59, seconds=59) # 查询停牌时间 calendar = TradeCalendar().calendar(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")) trading_calendar = calendar['date'][calendar['trade']] trading_calendar_index = pandas.DataFrame(trading_calendar, columns=['index']).set_index('index') rett = trading_calendar_index.copy() # 确定要查询的集合和字段值 collection, field = list(collection2factor_map.items())[0] field = field[0].name snap = ['SecuCode', 'PubDate', field] doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 # 查询 db_coll = connect_coll(collection, self._db) ret = db_coll.find({'SecuCode': {'$in': stock_list}, "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap) # 生成查询结果 data = pandas.DataFrame(list(ret)) # 对查询结果进行规范化 if not data.empty: data[snap[1]] = data[snap[1]].map(lambda x: x.strftime('%Y-%m-%d')) data = data[snap] data.columns = ['stock', 'time', field] data = pandas.crosstab(data['time'], data['stock'], values=data[field], aggfunc='last') # 更新 rett rett = data.merge(rett, left_index=True, right_index=True, how='outer') rett = rett.fillna(method='pad') # 先向后填充数据 rett = rett.ix[trading_calendar_index.index] # 再以日历限制一次日期 to_concat_ret = pandas.DataFrame(dict(zip(stock_list, [1] * len(stock_list))),index=['1']) rett = pandas.concat([rett, to_concat_ret]) rett = rett.drop(['1']) # 整理结果 rett = rett.astype(float) rett = rett.to_records() rett.dtype.names = ['date'] + list(rett.dtype.names)[1:] # 暂时返回structured array,后面可以让用户选择返回pandas # 固定了因子的表格,表的行索引是股票,列索引是时间 return numpy.array(rett)
def fix_time(self, stock_list: list or str, factors: list or f, trade_date: datetime.date or str): stock_list = convert_11code(stock_list) collection2factor_map = dis_collection2factor_map( factors, self.factor2collection_map) start_date = datetime.datetime(2010, 8, 14) end_date = start_date + datetime.timedelta( hours=23, minutes=59, seconds=59) dtypes = [("stock", "U11")] for ft in factors: dtypes.append((ft.name, "<f8")) result = numpy.full((len(stock_list), ), numpy.NAN, dtype=dtypes) result["stock"] = stock_list indexes = {} for collection in collection2factor_map: factor_name_list = gen_factor_name_list( collection2factor_map[collection]) snap = [ 'SecuCode', 'PubDate', ] snap.extend(factor_name_list) doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 db_coll = connect_coll(collection, self._db) data = db_coll.find( { 'SecuCode': { '$in': stock_list }, "PubDate": { "$gte": start_date, "$lte": end_date } }, doc_snap).sort("PubDate", pymongo.ASCENDING) for d in data: code = d['SecuCode'] idx = indexes.get(code) if idx is None: idx = find_date_in_array(code, result["stock"]) if idx == -1: system_log.warning( f"[Finance.fix_time] code index not found. record={d}" ) continue indexes[code] = idx for ft in factor_name_list: result[idx][ft] = d[ft] return result
def fix_factor(self, stock_list: list, factor: f or list, time: str or datetime.date, frequency: (1, 2, 3, 4)): # 格式化参数 stock_list = convert_11code(stock_list) collection2factor_map = dis_collection2factor_map( factor, self.factor2collection_map) collection, field = list(collection2factor_map.items())[0] field = field[0].name snap = ['SecuCode', 'PubDate', field] doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 # 测试时间点 start_date = datetime.datetime(2015, 1, 1) end_date = datetime.datetime(2015, 3, 31) # 查询 db_coll = connect_coll(collection, self._db) ret = db_coll.find( { 'SecuCode': { '$in': stock_list }, "PubDate": { "$gte": start_date, "$lte": end_date } }, doc_snap).sort('PubDate', pymongo.ASCENDING) dtypes = [("date", "uint32")] # 转换证券代码标识 for code in stock_list: dtypes.append((code, "<f8")) trade_days = TradeCalendar().calendar(start_date, end_date) result = numpy.full((trade_days.shape[0], ), numpy.NaN, dtype=dtypes) result["date"] = trade_days indexes = {} for r in ret: t = yyyymmdd_date(r['PubDate']) idx = indexes.get(t) if not idx: idx = find_date_in_array(t, result["date"]) if idx == -1: # 非交易日 system_log.warning( f"[Finance.fix_factor] date index not found. record={r}" ) continue indexes[t] = idx result[r.get('SecuCode')][idx] = r[field] return result
def fix_symbol(self, stock: str, factors: list or f, start_date: datetime.date or str, end_date: datetime.date or str): # 格式化参数 stock = convert_11code(stock)[0] collection2factor_map = dis_collection2factor_map(factors, self.factor2collection_map) start_date = convert2datetime(start_date) end_date = convert2datetime(end_date) end_date = end_date + datetime.timedelta(hours=23, minutes=59, seconds=59) # 查询停牌时间 calendar = TradeCalendar().calendar(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")) trading_calendar = calendar['date'][calendar['trade']] trading_calendar_index = pandas.DataFrame(trading_calendar, columns=['index']).set_index('index') rett = trading_calendar_index.copy() # 确定要查询的集合和字段值 _year = int(start_date.strftime("%Y")) - 1 start_date = datetime.datetime(_year, 1, 1, 0, 0, 0) for collection in collection2factor_map: factor_name_list = gen_factor_name_list(collection2factor_map[collection]) snap = ['PubDate', ] snap.extend(factor_name_list) doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 # 查询 db_coll = connect_coll(collection, self._db) data = db_coll.find({'SecuCode': stock, "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap) # 生成查询结果 data = pandas.DataFrame(list(data)) # 对查询结果进行规范化 if not data.empty: data[snap[0]] = data[snap[0]].map(lambda x: x.strftime('%Y-%m-%d')) data = data[snap] data = data.set_index(snap[0]) # 循环更新 rett rett = rett.merge(data, left_index=True, right_index=True, how='outer') rett[factor_name_list] = rett[factor_name_list].fillna(method='pad') # 先向后填充数据 rett = rett.ix[trading_calendar_index.index] # 再以日历限制一次日期 # 整理结果 rett = rett.astype(float).to_records() rett.dtype.names = ['date'] + list(rett.dtype.names)[1:] return numpy.array(rett)
def fix_time(self, stock_list: list or str, factors: list or f, trade_date: datetime.date or str): # 格式化参数 stock_list = convert_11code(stock_list) collection2factor_map = dis_collection2factor_map(factors, self.factor2collection_map) start_date = convert2datetime(trade_date) end_date = start_date + datetime.timedelta(hours=23, minutes=59, seconds=59) # 补充缺失股票 rett = pandas.DataFrame(stock_list, columns=['stock']).set_index('stock') # 确定要查询的集合和字段值 _year = int(start_date.strftime("%Y")) - 1 start_date = datetime.datetime(_year, 1, 1, 0, 0, 0) for collection in collection2factor_map: factor_name_list = gen_factor_name_list(collection2factor_map[collection]) snap = ['SecuCode', 'PubDate',] snap.extend(factor_name_list) doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 # 查询 db_coll = connect_coll(collection, self._db) data = db_coll.find({'SecuCode': {'$in': stock_list}, "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap) # 生成查询结果 data = pandas.DataFrame(list(data)) # 对查询结果进行规范化 if not data.empty: data = data[snap].set_index('PubDate') data.columns = ['stock'] + factor_name_list if len(data): data = data.groupby('stock') data = pandas.DataFrame([i[1].iloc[-1] for i in data]).set_index('stock') else: data = data.set_index('stock') # 循环更新 rett rett = rett.merge(data, left_index=True, right_index=True, how='outer') # 整理结果 rett = rett.astype(float).to_records() rett.dtype.names = ['stock'] + list(rett.dtype.names)[1:] return numpy.array(rett)
def fix_factor(self, stock_list: list or str, factor: f, start_date: datetime.date or str, end_date: datetime.date or str): start = time.time() # 格式化参数 stock_list = convert_11code(stock_list) start_date = convert2datetime(start_date) end_date = convert2datetime(end_date) mark2factor_map = dis_mark2factor_map(factor, self.factor2mark_map) if not mark2factor_map: print('未收录因子' + factor.name + ',或您没有获取该因子权限,请联系管理员。') return mark = list(mark2factor_map.keys())[0] instance = self.mark2instance_map.get(mark, None) ret = instance.fix_factor(stock_list, factor, start_date, end_date) if instance else None end = time.time() print(factor[0].name) print('fix_factor耗时:', end - start) # 这个接口不需要切片,也不需要转换对象格式 return ret
def fix_symbol(self, stock: str, factors: list or f, time: str or datetime.date, frequency: (1, 2, 3, 4)): stock = convert_11code(stock)[0] print(stock) collection2factor_map = dis_collection2factor_map( factors, self.factor2collection_map) # 测试时间点 start_date = datetime.datetime(2010, 1, 1) end_date = datetime.datetime(2015, 3, 31) calendar = TradeCalendar() trade_days = calendar.calendar(start_date, end_date) print("==>", trade_days) dtypes = [("date", "uint32")] for ft in factors: dtypes.append((ft.name, "<f8")) print("###", dtypes) for collection in collection2factor_map: factor_name_list = gen_factor_name_list( collection2factor_map[collection]) print("-->", factor_name_list) snap = [ 'PubDate', ] snap.extend(factor_name_list) doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 db_coll = connect_coll(collection, self._db) data = db_coll.find( { 'SecuCode': stock, "PubDate": { "$gte": start_date, "$lte": end_date } }, doc_snap).sort("PubDate", pymongo.ASCENDING) # 初始化结果 result = numpy.full((trade_days.shape[0], ), numpy.NAN, dtype=dtypes) result["date"] = trade_days indexes = {} for d in data: t = yyyymmdd_date(d["PubDate"]) print(t) idx = indexes.get(t) print(idx, "---") if not idx: idx = find_date_in_array(t, result["date"]) print(idx, "===") if idx == -1: system_log.warning( f"[Finance.fix_symbol] date index not found. record={d}" ) continue indexes[t] = idx for ft in factor_name_list: result[idx][ft] = d[ft] return result