def scrawl_single_tick(i, path, ex, tdates): the_dir1 = os.path.join(path, ex.upper(), str(i[2].year)) if not os.path.exists(the_dir1): os.makedirs(the_dir1) the_dir = os.path.join(path, ex.upper(), str(i[2].year), i[0] + ".csv.gz") the_dir2 = os.path.join(path, ex.upper(), str(i[2].year), i[0] + ".csv") if not os.path.exists(the_dir): print(the_dir) print(i) print(tdates[i[2]]) print(i[2]) api = TqApi(account=TqSim()) # api = TqApi(account=TqSim(),url="ws://192.168.56.1:7777") td = DataDownloader(api, symbol_list=[ex.upper() + "." + i[1]], dur_sec=0, start_dt=tdates[i[2]] + timedelta(hours=17), end_dt=i[2] + timedelta(hours=16), csv_file_name=the_dir2) while not td.is_finished(): api.wait_update() # print("progress: tick:%.2f%%" % td.get_progress()) print("done:" + the_dir) api.close() with open(the_dir2, 'rb') as f: with gzip.GzipFile(filename=the_dir2 + ".gz", mode='w', compresslevel=9) as gf: content = f.read() gf.write(content) os.remove(the_dir2) del td del api gc.collect()
def download_bar(self, symbol: str, exchange: Exchange, interval: Interval, start_dt: datetime, end_dt: datetime): csv_file_name = self.make_csvfile_name(symbol=symbol, exchange=exchange, interval=interval, start_dt=start_dt, end_dt=end_dt) if os.path.exists(csv_file_name): print(csv_file_name + "已存在,删除") os.remove(csv_file_name) # 下载从 2018-01-01凌晨6点 到 2018-06-01下午4点 的 cu1805,cu1807,IC1803 分钟线数据,所有数据按 cu1805 的时间对齐 # 例如 cu1805 夜盘交易时段, IC1803 的各项数据为 N/A # 例如 cu1805 13:00-13:30 不交易, 因此 IC1803 在 13:00-13:30 之间的K线数据会被跳过 with TqApi(TqSim()) as api: download_task = DataDownloader( api, symbol_list=[exchange.value + '.' + symbol], dur_sec=INTERVAL_2_SEC_MAP[interval], start_dt=start_dt, end_dt=end_dt, csv_file_name=csv_file_name) # 使用with closing机制确保下载完成后释放对应的资源 with closing(api): while not download_task.is_finished(): self.api.wait_update() print("tq download progress: ", "%.2f%%" % download_task.get_progress())
def download_history(self, start, end, symbol, interval): """ 下载CSV文件回来,并转换成dataframe """ csv_file = "tqdata.csv" status = DataDownloader(api=self.api, symbol_list=[symbol], start_dt=start, end_dt=end, dur_sec=interval, csv_file_name=csv_file) while not status.is_finished(): self.api.wait_update() if os.path.exists(csv_file): df = pd.read_csv(csv_file) df["datetime"] = pd.to_datetime(df["datetime"]) if interval > 0: df = df.rename(columns={f"{symbol}.open": "open"}) df = df.rename(columns={f"{symbol}.high": "high"}) df = df.rename(columns={f"{symbol}.low": "low"}) df = df.rename(columns={f"{symbol}.close": "close"}) df = df.rename(columns={f"{symbol}.volume": "volume"}) df = df.rename(columns={f"{symbol}.open_oi": "open_oi"}) os.unlink(csv_file) return df else: return None
def download(api): kd = DataDownloader(api, symbol_list="SHFE.cu2012", dur_sec=60, start_dt=datetime(2020, 6, 1, 6, 0, 0), end_dt=datetime(2020, 6, 1, 16, 0, 0), csv_file_name="kline.csv") while not kd.is_finished(): api.wait_update() print(f"progress: kline: {kd.get_progress():8.2f} ")
def get_1min(df_zhuli, day2idx, idx2day): # # 1分钟k线数据下载 for i in tqdm(range(len(df_zhuli))): # if i >= 173: # continue # break day1 = df_zhuli['date_max'][i] et = datetime.datetime(day1.year, day1.month, day1.day, 16) # 获取分时数据时,要从前一个交易日的21点开始 day2 = df_zhuli['date_min'][i] st = day2.year * 10000 + day2.month * 100 + day2.day idx = day2idx[st] if idx == 0: st = datetime.datetime(st // 10000, st % 10000 // 100, st % 10000 % 100, 8) else: st = idx2day[idx - 1] st = datetime.datetime(st // 10000, st % 10000 // 100, st % 10000 % 100, 20) num = myfun2(df_zhuli['code'][i]) symbol = df_zhuli['symbol'][i] exchange = df_zhuli['exchange'][i] # if exchange != 'CZCE': code = exchange + '.' + symbol.lower() + num else: code = exchange + '.' + symbol + num[1:] if code == 'CZCE.JR003': continue # 这个文件有问题 # print(code, st, et) save_path = os.path.join('data/1minute', code + ".csv") # if code not in ['CZCE.JR009', 'SHFE.cu1902', 'SHFE.wr2005', 'SHFE.wr2101']: # continue kd = DataDownloader(api, symbol_list=code, dur_sec=60, start_dt=st, end_dt=et, csv_file_name=save_path) try: while not kd.is_finished(): api.wait_update() # print("progress: kline: %.2f" % (kd.get_progress())) kd.get_progress() except Exception as e: print(code) print(e)
def scrawl_day_tick(date, ex): agg = agg_future_dayk() logging.info("start filter existed symbols") path = TICK_PATH logging.info("start getting tick data") api = TqApi(account=TqSim()) logging.info(ex + ": start getting tick") currentYearData = agg.getCurrentYearData(ex) currentYearData = currentYearData[currentYearData['date'] == date] pathpair = list( map( lambda x: (x[1].strftime('%Y%m%d') + "-" + x[0], x[0], datetime.utcfromtimestamp(x[1].timestamp())), currentYearData[['symbol', 'date']].values)) trading_dates = get_trading_calendar(security_type="future", exchange="shfe") tdates = {} for i in range(len(trading_dates)): if i > 0: tdates[datetime.strptime(trading_dates[i], '%Y%m%d')] = datetime.strptime( trading_dates[i - 1], '%Y%m%d') for i in pathpair: if i[1].startswith("sc"): continue the_dir1 = os.path.join(path, ex.upper(), str(i[2].year)) if not os.path.exists(the_dir1): os.makedirs(the_dir1) the_dir = os.path.join(path, ex.upper(), str(i[2].year), i[0] + ".csv.gz") the_dir2 = os.path.join(path, ex.upper(), str(i[2].year), i[0] + ".csv") # print(the_dir) if not os.path.exists(the_dir): td = DataDownloader(api, symbol_list=[ex.upper() + "." + i[1]], dur_sec=0, start_dt=tdates[i[2]] + timedelta(hours=17), end_dt=i[2] + timedelta(hours=15), csv_file_name=the_dir2) while not td.is_finished(): api.wait_update() # print("progress: tick:%.2f%%" % td.get_progress()) print("done:" + the_dir) logging.info(ex + ": complete getting tick")
def run(instrumentid, period, exchangeid='SHFE'): api = TqApi(TqSim()) inst = instrumentid instinfo = get_inst_info(inst) exchangeid=instinfo['ExchangeID'] period = int(period) if period is not None else 780 instid = ''.join([exchangeid, '.', inst]) datafile = inst + '_' + str(period) + '.csv' enddt = datetime.now() kd = DataDownloader(api, symbol_list=[instid], dur_sec=period, start_dt=datetime(2016, 1, 1), end_dt=enddt, csv_file_name=datafile) with closing(api): while not kd.is_finished(): api.wait_update() print(("progress: kline: %.2f%%" % kd.get_progress())) return datafile
def main(): api = TqApi(TqSim()) inst = instid.split('.')[1] tickfile = inst + 'tick.csv' stdt = datetime(2016, 1, 1) eddt = datetime.now() # eddt = datetime(2018, 8, 30) # 下载从 2018-01-01 到 2018-06-01 的 cu1805,cu1807,IC1803 分钟线数据,所有数据按 cu1805 的时间对齐 # 例如 cu1805 夜盘交易时段, IC1803 的各项数据为 N/A # 例如 cu1805 13:00-13:30 不交易, 因此 IC1803 在 13:00-13:30 之间的K线数据会被跳过 # 下载从 2018-05-01 到 2018-07-01 的 T1809 盘口Tick数据 td = DataDownloader(api, symbol_list=[instid], dur_sec=0, start_dt=stdt, end_dt=eddt, csv_file_name=tickfile) while not td.is_finished(): api.wait_update() print(("progress: tick:%.2f%%" % td.get_progress()))
def use_large_df(): symbol = "*****@*****.**" freq = '5min' file_csv = f"{symbol}_kline_{freq}.csv" start_dt = datetime(2017, 1, 1, 6, 0, 0) end_dt = datetime(2020, 5, 1, 6, 0, 0) freq_dur_sec = {"1min": 60, '5min': 300, '30min': 1800, 'D': 3600 * 24} freq_delta = { "1min": timedelta(days=20), '5min': timedelta(days=100), '30min': timedelta(days=300), 'D': timedelta(days=3000) } api = TqApi() k = DataDownloader(api, symbol_list=symbol, dur_sec=freq_dur_sec[freq], start_dt=start_dt - freq_delta[freq], end_dt=end_dt, csv_file_name=file_csv) with closing(api): while not k.is_finished(): api.wait_update() print("download progress: %.2f%%" % k.get_progress()) kline = pd.read_csv(file_csv) kline.columns = [x.replace(symbol + ".", "") for x in kline.columns] kline.rename({"volume": "vol"}, axis=1, inplace=True) kline.loc[:, "symbol"] = symbol kline.loc[:, "dt"] = kline['datetime'].apply(lambda x: x.split(".")[0]) kline = kline[['symbol', 'dt', 'open', 'close', 'high', 'low', 'vol']] print(kline.shape) ka = KlineAnalyze(kline) return ka
from tqsdk.tools import DataDownloader import getopt, os, sys, re import json api = TqApi(TqSim()) inst = 'rb1905' exchangeid = 'SHFE' instid = ''.join([exchangeid, '.', inst]) tickfile = inst + 'tick.csv' # interval = 60 stdt = datetime(2016, 1, 1) eddt = datetime.now() # 下载从 2018-01-01 到 2018-06-01 的 cu1805,cu1807,IC1803 分钟线数据,所有数据按 cu1805 的时间对齐 # 例如 cu1805 夜盘交易时段, IC1803 的各项数据为 N/A # 例如 cu1805 13:00-13:30 不交易, 因此 IC1803 在 13:00-13:30 之间的K线数据会被跳过 # 下载从 2018-05-01 到 2018-07-01 的 T1809 盘口Tick数据 td = DataDownloader(api, symbol_list=[instid], dur_sec=0, start_dt=stdt, end_dt=eddt, csv_file_name=tickfile) while not td.is_finished(): api.wait_update() print("progress: tick:%.2f%%" % td.get_progress())
#!/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'chengzhi' from datetime import datetime from contextlib import closing from tqsdk import TqApi, TqAuth from tqsdk.tools import DataDownloader api = TqApi(auth=TqAuth("信易账户", "账户密码")) # 下载从 2018-01-01凌晨6点 到 2018-06-01下午4点 的 cu1805 分钟线数据 kd = DataDownloader(api, symbol_list="SHFE.cu1805", dur_sec=60, start_dt=datetime(2018, 1, 1, 6, 0, 0), end_dt=datetime(2018, 6, 1, 16, 0, 0), csv_file_name="kline.csv") # 下载从 2018-05-01凌晨0点 到 2018-07-01凌晨0点 的 T1809 盘口Tick数据 td = DataDownloader(api, symbol_list="CFFEX.T1809", dur_sec=0, start_dt=datetime(2018, 5, 1), end_dt=datetime(2018, 7, 1), csv_file_name="tick.csv") # 使用with closing机制确保下载完成后释放对应的资源 with closing(api): while not kd.is_finished() or not td.is_finished(): api.wait_update() print("progress: kline: %.2f%% tick:%.2f%%" % (kd.get_progress(), td.get_progress()))
with closing(tq_api): for request in download_request_list: task_name = request['symbol'] file_name = os.path.join(application_path, 'data_downloaded', f'{request["symbol"]}_{period(request["period"])}.csv') task = DataDownloader( tq_api, symbol_list=request['symbol'], dur_sec=request['period'], start_dt=request['start'], end_dt=request['end'] if today > request['end'] else today - timedelta(days=1), csv_file_name=file_name ) while not task.is_finished(): tq_api.wait_update() print(f'正在下载 [{task_name}] 的 {period(request["period"])} 数据,已完成: {task.get_progress():,.3f}%。') # 处理下载好的 csv 文件的 header, 也就是 pandas.DataFrame 的 column. if task.is_finished(): df = pd.read_csv(file_name) if period(request['period']) == 'tick': column_list = tick_column_list else: column_list = quote_column_list for column in column_list: column_x = ''.join([request['symbol'], '.', column]) if column_x in df.columns: df.rename(columns={column_x: column}, inplace=True) df.to_csv(file_name, index=False)