Example #1
0
def scrawl_single_tick(i, path, ex, tdates):
    the_dir1 = os.path.join(path, ex.upper(), str(i[2].year))
    if not os.path.exists(the_dir1):
        os.makedirs(the_dir1)
    the_dir = os.path.join(path, ex.upper(), str(i[2].year), i[0] + ".csv.gz")
    the_dir2 = os.path.join(path, ex.upper(), str(i[2].year), i[0] + ".csv")
    if not os.path.exists(the_dir):
        print(the_dir)
        print(i)
        print(tdates[i[2]])
        print(i[2])
        api = TqApi(account=TqSim())
        # api = TqApi(account=TqSim(),url="ws://192.168.56.1:7777")
        td = DataDownloader(api,
                            symbol_list=[ex.upper() + "." + i[1]],
                            dur_sec=0,
                            start_dt=tdates[i[2]] + timedelta(hours=17),
                            end_dt=i[2] + timedelta(hours=16),
                            csv_file_name=the_dir2)
        while not td.is_finished():
            api.wait_update()
            # print("progress:  tick:%.2f%%" %  td.get_progress())
        print("done:" + the_dir)
        api.close()
        with open(the_dir2, 'rb') as f:
            with gzip.GzipFile(filename=the_dir2 + ".gz",
                               mode='w',
                               compresslevel=9) as gf:
                content = f.read()
                gf.write(content)
        os.remove(the_dir2)
        del td
        del api
        gc.collect()
Example #2
0
    def download_bar(self, symbol: str, exchange: Exchange, interval: Interval,
                     start_dt: datetime, end_dt: datetime):

        csv_file_name = self.make_csvfile_name(symbol=symbol,
                                               exchange=exchange,
                                               interval=interval,
                                               start_dt=start_dt,
                                               end_dt=end_dt)
        if os.path.exists(csv_file_name):
            print(csv_file_name + "已存在,删除")
            os.remove(csv_file_name)

        # 下载从 2018-01-01凌晨6点 到 2018-06-01下午4点 的 cu1805,cu1807,IC1803 分钟线数据,所有数据按 cu1805 的时间对齐
        # 例如 cu1805 夜盘交易时段, IC1803 的各项数据为 N/A
        # 例如 cu1805 13:00-13:30 不交易, 因此 IC1803 在 13:00-13:30 之间的K线数据会被跳过
        with TqApi(TqSim()) as api:
            download_task = DataDownloader(
                api,
                symbol_list=[exchange.value + '.' + symbol],
                dur_sec=INTERVAL_2_SEC_MAP[interval],
                start_dt=start_dt,
                end_dt=end_dt,
                csv_file_name=csv_file_name)
            # 使用with closing机制确保下载完成后释放对应的资源
            with closing(api):
                while not download_task.is_finished():
                    self.api.wait_update()
                    print("tq download progress: ",
                          "%.2f%%" % download_task.get_progress())
Example #3
0
    def download_history(self, start, end, symbol, interval):
        """
        下载CSV文件回来,并转换成dataframe
        """
        csv_file = "tqdata.csv"
        status = DataDownloader(api=self.api,
                                symbol_list=[symbol],
                                start_dt=start,
                                end_dt=end,
                                dur_sec=interval,
                                csv_file_name=csv_file)
        while not status.is_finished():
            self.api.wait_update()

        if os.path.exists(csv_file):
            df = pd.read_csv(csv_file)
            df["datetime"] = pd.to_datetime(df["datetime"])

            if interval > 0:
                df = df.rename(columns={f"{symbol}.open": "open"})
                df = df.rename(columns={f"{symbol}.high": "high"})
                df = df.rename(columns={f"{symbol}.low": "low"})
                df = df.rename(columns={f"{symbol}.close": "close"})
                df = df.rename(columns={f"{symbol}.volume": "volume"})
                df = df.rename(columns={f"{symbol}.open_oi": "open_oi"})

            os.unlink(csv_file)
            return df
        else:
            return None
Example #4
0
def download(api):
    kd = DataDownloader(api,
                        symbol_list="SHFE.cu2012",
                        dur_sec=60,
                        start_dt=datetime(2020, 6, 1, 6, 0, 0),
                        end_dt=datetime(2020, 6, 1, 16, 0, 0),
                        csv_file_name="kline.csv")
    while not kd.is_finished():
        api.wait_update()
        print(f"progress: kline: {kd.get_progress():8.2f} ")
Example #5
0
def get_1min(df_zhuli, day2idx, idx2day):
    # # 1分钟k线数据下载
    for i in tqdm(range(len(df_zhuli))):
        # if i >= 173:
        #     continue
        # break
        day1 = df_zhuli['date_max'][i]
        et = datetime.datetime(day1.year, day1.month, day1.day, 16)

        # 获取分时数据时,要从前一个交易日的21点开始
        day2 = df_zhuli['date_min'][i]
        st = day2.year * 10000 + day2.month * 100 + day2.day
        idx = day2idx[st]

        if idx == 0:
            st = datetime.datetime(st // 10000, st % 10000 // 100,
                                   st % 10000 % 100, 8)
        else:
            st = idx2day[idx - 1]
            st = datetime.datetime(st // 10000, st % 10000 // 100,
                                   st % 10000 % 100, 20)

        num = myfun2(df_zhuli['code'][i])

        symbol = df_zhuli['symbol'][i]
        exchange = df_zhuli['exchange'][i]

        #
        if exchange != 'CZCE':
            code = exchange + '.' + symbol.lower() + num
        else:
            code = exchange + '.' + symbol + num[1:]
        if code == 'CZCE.JR003': continue  # 这个文件有问题
        # print(code, st, et)
        save_path = os.path.join('data/1minute', code + ".csv")

        # if code not in ['CZCE.JR009', 'SHFE.cu1902', 'SHFE.wr2005', 'SHFE.wr2101']:
        #     continue

        kd = DataDownloader(api,
                            symbol_list=code,
                            dur_sec=60,
                            start_dt=st,
                            end_dt=et,
                            csv_file_name=save_path)

        try:
            while not kd.is_finished():
                api.wait_update()
                # print("progress: kline: %.2f" % (kd.get_progress()))
                kd.get_progress()
        except Exception as e:
            print(code)
            print(e)
Example #6
0
def scrawl_day_tick(date, ex):
    agg = agg_future_dayk()
    logging.info("start filter existed symbols")
    path = TICK_PATH
    logging.info("start getting tick data")
    api = TqApi(account=TqSim())
    logging.info(ex + ": start getting tick")
    currentYearData = agg.getCurrentYearData(ex)
    currentYearData = currentYearData[currentYearData['date'] == date]
    pathpair = list(
        map(
            lambda x: (x[1].strftime('%Y%m%d') + "-" + x[0], x[0],
                       datetime.utcfromtimestamp(x[1].timestamp())),
            currentYearData[['symbol', 'date']].values))
    trading_dates = get_trading_calendar(security_type="future",
                                         exchange="shfe")
    tdates = {}
    for i in range(len(trading_dates)):
        if i > 0:
            tdates[datetime.strptime(trading_dates[i],
                                     '%Y%m%d')] = datetime.strptime(
                                         trading_dates[i - 1], '%Y%m%d')
    for i in pathpair:
        if i[1].startswith("sc"):
            continue
        the_dir1 = os.path.join(path, ex.upper(), str(i[2].year))
        if not os.path.exists(the_dir1):
            os.makedirs(the_dir1)
        the_dir = os.path.join(path, ex.upper(), str(i[2].year),
                               i[0] + ".csv.gz")
        the_dir2 = os.path.join(path, ex.upper(), str(i[2].year),
                                i[0] + ".csv")
        # print(the_dir)
        if not os.path.exists(the_dir):
            td = DataDownloader(api,
                                symbol_list=[ex.upper() + "." + i[1]],
                                dur_sec=0,
                                start_dt=tdates[i[2]] + timedelta(hours=17),
                                end_dt=i[2] + timedelta(hours=15),
                                csv_file_name=the_dir2)
            while not td.is_finished():
                api.wait_update()
                # print("progress:  tick:%.2f%%" %  td.get_progress())
            print("done:" + the_dir)
    logging.info(ex + ": complete getting tick")
Example #7
0
def run(instrumentid, period, exchangeid='SHFE'):
    api = TqApi(TqSim())
    inst = instrumentid
    instinfo = get_inst_info(inst)
    exchangeid=instinfo['ExchangeID']
    period = int(period) if period is not None else 780

    instid = ''.join([exchangeid, '.', inst])
    datafile = inst + '_' + str(period) + '.csv'
    enddt = datetime.now()
    kd = DataDownloader(api, symbol_list=[instid], dur_sec=period,
                        start_dt=datetime(2016, 1, 1), end_dt=enddt, csv_file_name=datafile)

    with closing(api):
        while not kd.is_finished():
            api.wait_update()
            print(("progress: kline: %.2f%%" % kd.get_progress()))
        return datafile
Example #8
0
def main():

    api = TqApi(TqSim())
    inst = instid.split('.')[1]
    tickfile = inst + 'tick.csv'
    stdt = datetime(2016, 1, 1)
    eddt = datetime.now()
    # eddt = datetime(2018, 8, 30)
    # 下载从 2018-01-01 到 2018-06-01 的 cu1805,cu1807,IC1803 分钟线数据,所有数据按 cu1805 的时间对齐
    # 例如 cu1805 夜盘交易时段, IC1803 的各项数据为 N/A
    # 例如 cu1805 13:00-13:30 不交易, 因此 IC1803 在 13:00-13:30 之间的K线数据会被跳过
    # 下载从 2018-05-01 到 2018-07-01 的 T1809 盘口Tick数据
    td = DataDownloader(api,
                        symbol_list=[instid],
                        dur_sec=0,
                        start_dt=stdt,
                        end_dt=eddt,
                        csv_file_name=tickfile)

    while not td.is_finished():
        api.wait_update()
        print(("progress:  tick:%.2f%%" % td.get_progress()))
Example #9
0
def use_large_df():
    symbol = "*****@*****.**"
    freq = '5min'

    file_csv = f"{symbol}_kline_{freq}.csv"
    start_dt = datetime(2017, 1, 1, 6, 0, 0)
    end_dt = datetime(2020, 5, 1, 6, 0, 0)
    freq_dur_sec = {"1min": 60, '5min': 300, '30min': 1800, 'D': 3600 * 24}
    freq_delta = {
        "1min": timedelta(days=20),
        '5min': timedelta(days=100),
        '30min': timedelta(days=300),
        'D': timedelta(days=3000)
    }

    api = TqApi()
    k = DataDownloader(api,
                       symbol_list=symbol,
                       dur_sec=freq_dur_sec[freq],
                       start_dt=start_dt - freq_delta[freq],
                       end_dt=end_dt,
                       csv_file_name=file_csv)

    with closing(api):
        while not k.is_finished():
            api.wait_update()
            print("download progress: %.2f%%" % k.get_progress())

    kline = pd.read_csv(file_csv)
    kline.columns = [x.replace(symbol + ".", "") for x in kline.columns]
    kline.rename({"volume": "vol"}, axis=1, inplace=True)
    kline.loc[:, "symbol"] = symbol
    kline.loc[:, "dt"] = kline['datetime'].apply(lambda x: x.split(".")[0])
    kline = kline[['symbol', 'dt', 'open', 'close', 'high', 'low', 'vol']]
    print(kline.shape)
    ka = KlineAnalyze(kline)
    return ka
Example #10
0
from tqsdk.tools import DataDownloader

import getopt, os, sys, re
import json

api = TqApi(TqSim())

inst = 'rb1905'
exchangeid = 'SHFE'
instid = ''.join([exchangeid, '.', inst])

tickfile = inst + 'tick.csv'
# interval = 60
stdt = datetime(2016, 1, 1)
eddt = datetime.now()

# 下载从 2018-01-01 到 2018-06-01 的 cu1805,cu1807,IC1803 分钟线数据,所有数据按 cu1805 的时间对齐
# 例如 cu1805 夜盘交易时段, IC1803 的各项数据为 N/A
# 例如 cu1805 13:00-13:30 不交易, 因此 IC1803 在 13:00-13:30 之间的K线数据会被跳过
# 下载从 2018-05-01 到 2018-07-01 的 T1809 盘口Tick数据
td = DataDownloader(api,
                    symbol_list=[instid],
                    dur_sec=0,
                    start_dt=stdt,
                    end_dt=eddt,
                    csv_file_name=tickfile)

while not td.is_finished():
    api.wait_update()
    print("progress:  tick:%.2f%%" % td.get_progress())
Example #11
0
#!/usr/bin/env python
#  -*- coding: utf-8 -*-
__author__ = 'chengzhi'

from datetime import datetime
from contextlib import closing
from tqsdk import TqApi, TqAuth
from tqsdk.tools import DataDownloader

api = TqApi(auth=TqAuth("信易账户", "账户密码"))
# 下载从 2018-01-01凌晨6点 到 2018-06-01下午4点 的 cu1805 分钟线数据
kd = DataDownloader(api,
                    symbol_list="SHFE.cu1805",
                    dur_sec=60,
                    start_dt=datetime(2018, 1, 1, 6, 0, 0),
                    end_dt=datetime(2018, 6, 1, 16, 0, 0),
                    csv_file_name="kline.csv")
# 下载从 2018-05-01凌晨0点 到 2018-07-01凌晨0点 的 T1809 盘口Tick数据
td = DataDownloader(api,
                    symbol_list="CFFEX.T1809",
                    dur_sec=0,
                    start_dt=datetime(2018, 5, 1),
                    end_dt=datetime(2018, 7, 1),
                    csv_file_name="tick.csv")
# 使用with closing机制确保下载完成后释放对应的资源
with closing(api):
    while not kd.is_finished() or not td.is_finished():
        api.wait_update()
        print("progress: kline: %.2f%% tick:%.2f%%" %
              (kd.get_progress(), td.get_progress()))
    with closing(tq_api):
        for request in download_request_list:
            task_name = request['symbol']
            file_name = os.path.join(application_path,
                                     'data_downloaded',
                                     f'{request["symbol"]}_{period(request["period"])}.csv')
            task = DataDownloader(
                tq_api,
                symbol_list=request['symbol'],
                dur_sec=request['period'],
                start_dt=request['start'],
                end_dt=request['end'] if today > request['end'] else today - timedelta(days=1),
                csv_file_name=file_name
            )

            while not task.is_finished():
                tq_api.wait_update()
                print(f'正在下载 [{task_name}] 的 {period(request["period"])} 数据,已完成: {task.get_progress():,.3f}%。')

            # 处理下载好的 csv 文件的 header, 也就是 pandas.DataFrame 的 column.
            if task.is_finished():
                df = pd.read_csv(file_name)
                if period(request['period']) == 'tick':
                    column_list = tick_column_list
                else:
                    column_list = quote_column_list
                for column in column_list:
                    column_x = ''.join([request['symbol'], '.', column])
                    if column_x in df.columns:
                        df.rename(columns={column_x: column}, inplace=True)
                df.to_csv(file_name, index=False)