def get_fg_index_df(self, cdate): df = BlockReader().get_df(ct.TONG_DA_XIN_FG_INDEX_PATH) df = df.loc[df.blockname == self.name] df = df[['code']] df['date'] = cdate df = df.reset_index(drop = True) return df
def __init__(self): self.day_reader = TdxDailyBarReader() self.minline_reader = TdxLCMinBarReader() self.block_reader = BlockReader() # pandas数据显示设置 pd.set_option('display.max_columns', None) # 显示所有列
def block(self, group=False, custom=False): ''' 获取板块数据 :param group: :param custom: :return: pd.dataFrame or None ''' reader = BlockReader() symbol = os.path.join(self.tdxdir, 'block_zs.dat') if symbol is not None: return reader.get_df(symbol, group) return None
def index(self, symbol='incon.dat', group=False): ''' 获取指数数据 :param symbol: :param group: :return: pd.dataFrame or None ''' reader = BlockReader() symbol = os.path.join(self.tdxdir, symbol) if symbol is not None: return reader.get_df(symbol, group) return None
def main(input, output, datatype): """ 通达信数据文件读取 """ if datatype == 'daily': reader = TdxDailyBarReader() elif datatype == 'ex_daily': reader = TdxExHqDailyBarReader() elif datatype == 'lc': reader = TdxLCMinBarReader() elif datatype == 'gbbq': reader = GbbqReader() elif datatype == 'block': reader = BlockReader() else: reader = TdxMinBarReader() try: df = reader.get_df(input) if (output): click.echo("写入到文件 : " + output) df.to_csv(output) else: print(df) except Exception as e: print(str(e))
def block(self, symbol='block', custom=False, group=False): ''' 获取板块数据 参考: http://blog.sina.com.cn/s/blog_623d2d280102vt8y.html :param custom: :param symbol: :param group: :return: pd.dataFrame or None ''' if custom: reader = CustomerBlockReader() vipdoc = os.path.join(self.tdxdir, 'T0002', 'blocknew', '{}'.format(symbol)) else: reader = BlockReader() vipdoc = os.path.join(self.tdxdir, 'T0002', 'hq_cache', '{}.dat'.format(symbol)) fmt = TYPE_GROUP if group else None if os.path.exists(vipdoc): return reader.get_df(vipdoc, format) else: logger.error('未找到所需的文件: {}'.format(vipdoc)) return None
def main(input, output, datatype): """ 通达信数据文件读取 """ if datatype == 'daily': reader = TdxDailyBarReader() elif datatype == 'ex_daily': reader = TdxExHqDailyBarReader() elif datatype == 'lc': reader = TdxLCMinBarReader() elif datatype == 'gbbq': reader = GbbqReader() elif datatype == 'block': reader = BlockReader() elif datatype == 'customblock': reader = CustomerBlockReader() elif datatype == 'history_financial' or datatype == 'hf': reader = HistoryFinancialReader() else: reader = TdxMinBarReader() try: df = reader.get_df(input) if output: click.echo("写入到文件 : " + output) df.to_csv(output) else: print(df) except Exception as e: print(str(e))
def get_style_block(): """ 返回股票对应的指数 block_zs.dat 对应通达信指数板块 block_gn.dat 对应通达信概念板块 block_fg.dat 对应通达信风格板块 融资融券 已高送转 近期弱势 index 为 code columns 为指数,如果为指数成份股 则为2 :return: """ filename = '{}{}{}'.format(TDX_DIR, os.sep, 'T0002\\hq_cache\\block_fg.dat') return BlockReader().get_df(filename).pivot(index='code', columns='blockname', values='block_type')
def _load_instruments(self, market, freq) -> Dict[str, List[Tuple[str, str]]]: """ 返回股票对应的指数 block_zs.dat 对应通达信指数板块 block_gn.dat 对应通达信概念板块 block_fg.dat 对应通达信风格板块 融资融券 已高送转 近期弱势 blockname block_type code_index code 板块名 2表示属于该板块 从0开始的计数 股票代码 6和9开头的是sh :param market: block name :param freq: no need :return -> dict: {instrument: [(begin, end), (begin, end), (begin, end)]} """ freq = str(Freq(freq)) logger.info(f"get instruments {market} begin......") # "czce":28, "dce":29, "shfe":30, "cffex":47 if market in ["future", "commodity", "czce", "dce", "shfe", "cffex"]: if market == "future": exchanges = ["czce", "dce", "shfe", "cffex"] elif market == "commodity": exchanges = ["czce", "dce", "shfe"] else: exchanges = market instruments_list, files_list = self.get_instrument_by_exchange( exchanges, freq) else: file_path = self.block_dir.joinpath("block_zs.dat") df = BlockReader().get_df(file_path) if market in df["blockname"].unique(): instruments_series = df[df["blockname"] == market]["code"] else: instruments_series = df["code"] instruments_list = instruments_series \ .apply(lambda x: "SH" + x if x.startswith("9") or x.startswith("6") else "SZ" + x) \ .to_list() files_list = \ [get_file_path_from_instrument(self.tdx_path, instrument, freq=freq)[1] for instrument in instruments_list] workers = max(min(C.get_kernels(freq), len(instruments_list)), 1) with tqdm(total=len(instruments_list)) as p_bar: with ProcessPoolExecutor(max_workers=workers) as executor: futures = {} for instrument, file_path in zip(instruments_list, files_list): futures[executor.submit(get_begin_and_end, file_path, False)] = instrument p_bar.update() error_code = {} _instruments = {} with tqdm(total=len(futures)) as p_bar: for _future in as_completed(futures): try: _begin_time, _end_time = _future.result() _instruments.setdefault(futures[_future], []).append( (_begin_time, _end_time)) except Exception: error_code[futures[_future]] = traceback.format_exc() p_bar.update() if error_code: logger.info(f"instruments errors: {error_code}") logger.info(f"get instruments {market} end......") return _instruments
class TdxLocalHelper: BlockReader_TYPE_FLAT = 0 BlockReader_TYPE_GROUP = 1 def __init__(self): self.day_reader = TdxDailyBarReader() self.minline_reader = TdxLCMinBarReader() self.block_reader = BlockReader() # pandas数据显示设置 pd.set_option('display.max_columns', None) # 显示所有列 #pd.set_option('display.max_rows', None) # 显示所有行 # 解析日线文件数据 #返回值:date:open,high,low,close,amount,volume def read_tdx_local_day(self): #df = reader.get_df(config.tdx_local_sz_day + "sz000001.day") sh000001.day df = self.day_reader.get_df(config.tdx_local_sh_day + "sh000001.day") print(df) # 批量格式化日期字段,去掉分隔符 def format_date(self, date): return ((str(date))[0:10]).replace('-', '') # 解析1分钟和5分钟数据 #返回值:date: open,high,low,close,amount,volume #csv格式:code,ts_code,trade_date(缩写),trade_time,time_index,open,high,low,close,amount,volume def read_tdx_local_minline_all(self, full_path,filename=""): #df = reader.get_df(config.tdx_local_sz_minline1 + "sz399001.lc1") #df = reader.get_df(config.tdx_local_sz_minline5 + "sz399001.lc5") #df = self.minline_reader.get_df(config.tdx_local_sh_minline1 + "sh600300.lc1") code = filename[2:8] ts_code = code + "." + filename[0:2].upper() df = self.minline_reader.get_df(full_path) df.insert(0, 'trade_time', df.index) df.insert(0, 'trade_date', df.index.floor('D')) df.insert(0, 'ts_code', ts_code) df.insert(0, 'code', code) df.reset_index(drop=True,inplace=True) #参考:https://zhuanlan.zhihu.com/p/110819220?from_voters_page=true df.insert(4, 'time_index', df.index) #df['trade_time'] = pd.to_datetime(df['trade_time'], infer_datetime_format=True).dt.normalize() # strftime('%m/%d/%Y') # format='%m/%d/%Y').dt.date #df['trade_time'] = df['trade_time'].apply(lambda x: x.strftime('%H:%M:%S')) df['trade_time'] = pd.to_datetime(df['trade_time'], format='%H:%M:%S').dt.strftime('%H:%M:%S') df['time_index'] = df['trade_time'].apply(lambda x: datatime_util.stockTradeTime2Index(x)) df['trade_date'] = df['trade_date'].apply(lambda x: self.format_date(x)) df['open'] = df['open'].apply(lambda x: round(x,2)) df['high'] = df['high'].apply(lambda x: round(x, 2)) df['low'] = df['low'].apply(lambda x: round(x, 2)) df['close'] = df['close'].apply(lambda x: round(x, 2)) csv_filename = config.tdx_csv_minline1_all + ts_code + ".csv" if os.path.isfile(csv_filename): os.remove(csv_filename) df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig") else: df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig") # 解析1分钟和5分钟数据,输出简单形式,只返回price和vol # 返回值:date: open,high,low,close,amount,volume # csv格式:code,ts_code,trade_date(缩写),trade_time,time_index,price,volume def read_tdx_local_minline_simple(self, full_path, filename=""): # df = reader.get_df(config.tdx_local_sz_minline1 + "sz399001.lc1") # df = reader.get_df(config.tdx_local_sz_minline5 + "sz399001.lc5") # df = self.minline_reader.get_df(config.tdx_local_sh_minline1 + "sh600300.lc1") code = filename[2:8] ts_code = code + "." + filename[0:2].upper() df = self.minline_reader.get_df(full_path) df.insert(0, 'trade_time', df.index) df.insert(0, 'trade_date', df.index.floor('D')) df.insert(0, 'ts_code', ts_code) df.insert(0, 'code', code) df.reset_index(drop=True, inplace=True) # 参考:https://zhuanlan.zhihu.com/p/110819220?from_voters_page=true df.insert(4, 'time_index', df.index) # df['trade_time'] = pd.to_datetime(df['trade_time'], infer_datetime_format=True).dt.normalize() # strftime('%m/%d/%Y') # format='%m/%d/%Y').dt.date # df['trade_time'] = df['trade_time'].apply(lambda x: x.strftime('%H:%M:%S')) df['trade_time'] = pd.to_datetime(df['trade_time'], format='%H:%M:%S').dt.strftime('%H:%M:%S') df['time_index'] = df['trade_time'].apply(lambda x: datatime_util.stockTradeTime2Index(x)) df['trade_date'] = df['trade_date'].apply(lambda x: self.format_date(x)) #df['open'] = df['open'].apply(lambda x: round(x, 2)) #df['high'] = df['high'].apply(lambda x: round(x, 2)) #df['low'] = df['low'].apply(lambda x: round(x, 2)) df['close'] = df['close'].apply(lambda x: round(x, 2)) df.rename(columns={'close': 'price'}, inplace=True) df.drop(['open', 'high', 'low', 'amount'], axis=1, inplace=True) csv_filename = config.tdx_csv_minline1_simple + ts_code + ".csv" if os.path.isfile(csv_filename): os.remove(csv_filename) df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig") else: df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig") # 解析板块数据 #扁平格式返回值:blockname,block_type,code_index,code def read_tdx_local_block(self): ##指数板块 风格板块 概念板块 一般板块 block_filename = ["block_zs.dat", "block_fg.dat", "block_gn.dat", "block.dat"] for block in block_filename: df = self.block_reader.get_df(config.tdx_local_block + block) # 默认扁平格式 df_group = self.block_reader.get_df(config.tdx_local_block + block, self.BlockReader_TYPE_GROUP) #分组格式 filename = config.tdx_csv_block + block[0:-4] + ".csv" filename_group = config.tdx_csv_block + block[0:-4] + "_group" + ".csv" if os.path.isfile(filename): df.to_csv(filename, index=False, mode='a', header=False, sep=',', encoding="utf_8_sig") df_group.to_csv(filename_group, index=False, mode='a', header=False, sep=',', encoding="utf_8_sig") else: df.to_csv(filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig") df_group.to_csv(filename_group, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig") # 解析分时图数据 def read_tdx_local_fst(self): reader = TdxMinBarReader() #这个reader不能解析分时图文件 # df = reader.get_df(config.tdx_local_sz_minline + "sz399001.lc1") df = reader.get_df(config.tdx_local_fst + "sh20200417.tfz") print(df) # 解析分时图文件,没有解析出来 """ 通达信的zst的数据记录是每6508个字节为一天的数据, 每26个字节为一个分钟的记录,这26个字节是这样分配的, 时间占两个字节化为十进制为570的话表示9:30分(570/60=9.5) 下一个是占四个字节的叫现价, 再下四个字节叫均价, 另外还有两个字节为该分钟成交量(现在有可能已经改为四个字节), 剩下的14个字节是预留的, """ def parse_fst_file(self): full_path = config.tdx_local_fst + "sh20200417.tfz" filesize = os.path.getsize(full_path) # 文件字节数 print("filesize为: %s" % (filesize)) if filesize == 0: return #print(chardet.detect(open(full_path, mode='rb').read())) #查看文件编码格式 file = open(full_path, "rb") try: i = 0 while True: print("游标位置:", file.tell()) stock_date = file.read(2) cur_price = file.read(4) arr_price = file.read(4) vol = file.read(4) stock_reservation = file.read(12) stock_date = unpack("h", stock_date) cur_price = unpack("l", cur_price) arr_price = unpack("l", arr_price) vol = unpack("l", vol) #stock_reservation = unpack("s", stock_reservation) print(stock_date) print(cur_price) print(arr_price) print(vol) print(stock_reservation) i = i + 1 if i == 2:break for line in file: result = chardet.detect(line) print("code: ", result) buf_size = len(line) rec_count = buf_size // 32 begin = 0 end = 32 print("行内容:", line) print("buf_size:", buf_size) print("rec_count:", rec_count) a = unpack('IIIIIfII', line[begin:end]) print("解码后的数据0: %s" % (str(a[0]))) print("解码后的数据1: %s" % (str(a[1]))) print("解码后的数据2: %s" % (str(a[2]))) break finally: file.close()
def get_block_info2(): # 默认扁平格式 df = BlockReader().get_df(".../T0002/hq_cache/block_zs.dat") print(df)
def tdx_block_reader(): # stock_data = ts.get_stock_basics() # stock_data.to_csv('stock.csv',columns=['name']) stock_edges = pd.DataFrame(columns=['fromID', 'toID', 'type']) block_nodes = {} begin_code = int(900001) #指数版块 zs_df = BlockReader().get_df("C:/new_tdx/T0002/hq_cache/block_zs.dat") for row in zs_df.iterrows(): if row[1]['blockname'] not in block_nodes: block_nodes[row[1]['blockname']] = begin_code begin_code += 1 stock_edges = stock_edges.append( { 'fromID': row[1]['code'], 'toID': str(block_nodes[row[1]['blockname']]), 'type': '属于' }, ignore_index=True) #风格版块 fg_df = BlockReader().get_df("C:/new_tdx/T0002/hq_cache/block_fg.dat") for row in fg_df.iterrows(): if row[1]['blockname'] not in block_nodes: block_nodes[row[1]['blockname']] = begin_code begin_code += 1 stock_edges = stock_edges.append( { 'fromID': row[1]['code'], 'toID': str(block_nodes[row[1]['blockname']]), 'type': '属于' }, ignore_index=True) #概念版块 gn_df = BlockReader().get_df("C:/new_tdx/T0002/hq_cache/block_gn.dat") for row in gn_df.iterrows(): if row[1]['blockname'] not in block_nodes: block_nodes[row[1]['blockname']] = begin_code begin_code += 1 stock_edges = stock_edges.append( { 'fromID': row[1]['code'], 'toID': str(block_nodes[row[1]['blockname']]), 'type': '属于' }, ignore_index=True) # 地区 block_area = ts.get_area_classified() for row in block_area.iterrows(): if row[1]['area'] not in block_nodes: block_nodes[row[1]['area']] = begin_code begin_code += 1 stock_edges = stock_edges.append( { 'fromID': row[1]['code'], 'toID': str(block_nodes[row[1]['area']]), 'type': '属于' }, ignore_index=True) # 行业 block_industry = ts.get_industry_classified() for row in block_industry.iterrows(): if row[1]['c_name'] not in block_nodes: block_nodes[row[1]['c_name']] = begin_code begin_code += 1 stock_edges = stock_edges.append( { 'fromID': row[1]['code'], 'toID': str(block_nodes[row[1]['c_name']]), 'type': '属于' }, ignore_index=True) #edges to csv stock_edges.to_csv('stock_edges.csv', index=None, encoding="utf-8") # nodes to csv stock_data = ts.get_stock_basics() nodes_df = pd.DataFrame(columns=['code', 'name']) for k, v in block_nodes.items(): # print(k) nodes_df = nodes_df.append({ 'code': str(v), 'name': k }, ignore_index=True) nodes_df = nodes_df.set_index('code') nodes_df = nodes_df.append(stock_data) nodes_df.to_csv('stock_nodes.csv', columns=['name'], encoding="utf-8")