def parse(year):
    # 2010 >>> 2021
    file_path = os.path.join(tmp_work_path, 'future_price_origin')
    file_path = os.path.join(file_path, 'history')
    file_path = os.path.join(file_path, 'czce_%s.txt' % year)
    logger.info(year + ' price file path: ' + file_path)
    try:
        with open(file_path, 'r', encoding='gbk') as f:
            lines = f.read().strip().split('\n')
    except UnicodeDecodeError as e:
        logger.warn(str(e))
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.read().strip().split('\n')
    for line in lines:
        if not line.strip().startswith(year):
            continue
        items = line.strip().replace(' ',
                                     '').replace(',',
                                                 '').replace('\t',
                                                             '').split('|')
        logger.info(str(items))
        # 开始解析
        trade_date = items[0].replace('-', '')
        czce_future_contract_name = items[1]
        # 提取期货品种代码
        check_res = 0
        future_variety_code = ''
        for _future_variety_code in FUTURE_VARIETIES.keys():
            if _future_variety_code in czce_future_contract_name:
                check_res += 1
                future_variety_code = _future_variety_code
        if check_res != 1:
            logger.error('合约代码对不上, czce_future_contract_name: ' +
                         czce_future_contract_name)
            exit_now()
        # 重新命名合约名称
        future_contract_name = rename_czce_future_contract_name(
            trade_date, czce_future_contract_name, future_variety_code)
        logger.info(czce_future_contract_name + ' >>> ' + future_contract_name)
        # 交易日期,合约名称,昨结算,开盘价,最高价,最低价,收盘价,今结算,成交量,持仓量,增减量,成交额(万元),交割结算价
        new_line = [
            trade_date, future_contract_name, items[2], items[3], items[4],
            items[5], items[6], items[7], items[10], items[11], items[12],
            items[13], items[14]
        ]
        logger.info('new_line=' + str(new_line))
        # 将该条数据插入文件
        future_price_contract_file_path = get_future_price_contract_file_path(
            future_variety_code, future_contract_name)
        logger.info('future_price_contract_file_path=' +
                    future_price_contract_file_path)
        insert_one_day_future_price_data_to_csv(
            future_price_contract_file_path, new_line)
        # 交易日期   |品种月份 |昨结算 |今开盘 |最高价 |最低价 |今收盘 |今结算 |涨跌1 |涨跌2 |成交量 |空盘量 |增减量 |成交额   |交割结算价
        # 2010-01-04|CF003	 |16260 |16305 |16540  |16295 |16370 |16455  |110  |195  |1734   |9614  |768   |14264.61 |0
        # 交易日期 | 品种月份 | 昨结算 | 今开盘 | 最高价 | 最低价 | 今收盘 | 今结算 | 涨跌1 | 涨跌2 | 成交量(手) | 空盘量 | 增减量 | 成交额(万元) | 交割结算价 |
        # 交易日期 | 品种代码 | 昨结算 | 今开盘 | 最高价 | 最低价 | 今收盘 | 今结算 | 涨跌1 | 涨跌2 | 成交量(手) | 空盘量 | 增减量 | 成交额(万元) | 交割结算价 |
        # 交易日期 | 品种代码 | 昨结算 | 今开盘 | 最高价 | 最低价 | 今收盘 | 今结算 | 涨跌1 | 涨跌2 | 成交量(手) | 持仓量 | 增减量 | 成交额(万元) | 交割结算价 |
    logger.info('parse_%s success!' % year)
예제 #2
0
 def gen_new_cookie(self, reason):
     '''Starts proxy to get new cookie from a user'''
     logger.warn(
         "Cookie invalid - reason: {} - loading proxy to regenerate".format(
             reason))
     logger.info(
         "In order to get a new token, we need to intercept it from the real NSO app. Please make sure you have a smartphone or Android emulator to continue."
     )
     logger.info(
         "If your smartphone runs Android 7.0 or higher, you will need to use an Android emulator or an iOS device to continue."
     )
     start_credential_proxy()
예제 #3
0
def parse_one(trade_date):
    """
    python3 zipfile 解压文件时出现文件名为乱码情况的解决方案:
    这个问题我在自己代码中跟中转换编码都不行,总出错!
    介绍一种便捷方法,直接改源码:
    直接找到zipfile.py文件
    第一处:
        if zinfo.flag_bits & 0x800:
            # UTF-8 filename
            fname_str = fname.decode("utf-8")
        else:
            # fname_str = fname.decode("cp437")
            fname_str = fname.decode("gbk")
    第二处:
        if flags & 0x800:
            # UTF-8 file names extension
            filename = filename.decode('utf-8')
        else:
            # Historical ZIP filename encoding
            # filename = filename.decode('cp437')
            filename = filename.decode('gbk')
    :param trade_date:
    :return:
    """
    future_holding_downloaded_origin_file_path = \
        get_future_holding_downloaded_origin_file_path(trade_date, FuturesExchange.DCE)
    if not os.path.exists(future_holding_downloaded_origin_file_path):
        logger.error('%s 文件不存在!' % future_holding_downloaded_origin_file_path)
        return False
    zip_file = zipfile.ZipFile(future_holding_downloaded_origin_file_path, "r")
    for file_name in zip_file.namelist():
        try:
            content = zip_file.read(file_name).decode('utf-8').strip().replace('\r', '').replace('\n\n', '\n')
        except UnicodeDecodeError:
            logger.info('数据编码有变动请检查, future_holding_downloaded_origin_file_path=' +
                        future_holding_downloaded_origin_file_path+file_name)
            content = zip_file.read(file_name).decode('gbk').strip().replace('\r', '').replace('\n\n', '\n')
        # 检查名次到总计的间隔
        check_content_one = re.findall(r'名次.*总计', content, re.S)
        if not check_content_one or len(check_content_one) != 1:
            logger.error('数据名次到总计的间隔有变动请检查, future_holding_downloaded_origin_file_path=' +
                         future_holding_downloaded_origin_file_path + file_name)
            return False
        check_content_one_list = check_content_one[0].split('\n')
        if len(check_content_one_list) == 6:
            logger.info(content)
            logger.info('名次到总计的间隔为6,空!!!')
            continue
        lines = content.split('\n')
        if len(lines) < 8:
            logger.error('数据不足8行有变动请检查, future_holding_downloaded_origin_file_path=' +
                         future_holding_downloaded_origin_file_path+file_name)
            return False
        # 检查前两行
        __repair_trade_date = None
        if '大连商品交易所' not in lines[0] or '合约代码:' not in lines[1] or 'Date:' not in lines[1]:
            tmp_title = file_name.split('_')
            if len(tmp_title) != 5 or len(tmp_title[1]) <= 4 or tmp_title[0] != trade_date:
                logger.error('数据格式有变动或日期对不上请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            future_variety_code = tmp_title[1][:-4].upper()
            _tmp_delivery_month = tmp_title[1][-4:]
        else:
            tmp_title = lines[1].replace('合约代码:', '').replace('Date:', '').replace('-', '').strip().split()
            if trade_date == '20161115' and tmp_title[1] == '20161114':  # 修复大连期货交易所提供的龙虎榜数据
                logger.warn('repair_trade_date, future_holding_downloaded_origin_file_path=' +
                            future_holding_downloaded_origin_file_path+file_name)
                __repair_trade_date = tmp_title[1]
            elif tmp_title[1] != trade_date:
                logger.info(str(tmp_title)+' | '+trade_date)
                logger.error('数据日期对不上请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            if len(tmp_title) != 2 or len(tmp_title[0]) <= 4:
                logger.error('数据格式有变动请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            future_variety_code = tmp_title[0][:-4].upper()
            if future_variety_code not in FUTURE_VARIETIES:
                logger.error('期货品种代码有问题请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            _tmp_delivery_month = tmp_title[0][-4:]
        # 重新命名合约名称
        future_contract_name = '%s%s%s' % (future_variety_code, trade_date[:2], _tmp_delivery_month)
        # 检查数据是否完整
        content = '\n'.join(lines).strip()
        items = content.split('名次')
        if len(items) != 4:
            logger.error('数据格式有变动三个不全请检查, future_holding_downloaded_origin_file_path=' +
                         future_holding_downloaded_origin_file_path+file_name)
            return False
        content_volume = items[1].strip()
        content_buy = items[2].strip()
        content_sell = items[3].strip()
        logger.info('%s, %s, %s | %s, %s' % (tmp_title, future_variety_code, future_contract_name,
                                             file_name, future_holding_downloaded_origin_file_path))
        # 开始解析
        future_holding_dict = get_future_holding_dict()
        future_holding_dict['code'] = future_contract_name
        future_holding_dict['date'] = trade_date
        if __repair_trade_date is not None:  # 修复大连期货交易所提供的龙虎榜数据
            future_holding_dict['date'] = __repair_trade_date
        # 解析成交量
        lines_volume = content_volume.split('\n')
        if '成交量' not in lines_volume[0] or '总计' not in lines_volume[-1]:
            logger.error('成交量数据格式表头有变动请检查, future_holding_downloaded_origin_file_path=' +
                         future_holding_downloaded_origin_file_path+file_name)
            return False
        if len(lines_volume) > 2:
            tmp_str_total = lines_volume[-1].replace('总计', '').strip().replace(',', '').split()
            if len(tmp_str_total) == 1:
                total_v, tmp_total_v = int(tmp_str_total[0]), 0
                total_i_a_d, tmp_total_i_a_d = 0, 0
            elif len(tmp_str_total) == 2:
                total_v, tmp_total_v = int(tmp_str_total[0]), 0
                total_i_a_d, tmp_total_i_a_d = int(tmp_str_total[1]), 0
            else:
                logger.error('成交量数据格式表尾有变动请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            for line_volume in lines_volume[1:-1]:
                line_volume = line_volume.split()
                if len(line_volume) != 4:
                    logger.error('成交量数据格式有变动请检查, future_holding_downloaded_origin_file_path=' +
                                 future_holding_downloaded_origin_file_path+file_name)
                    return False
                v = int(line_volume[2].replace(',', ''))
                i_a_d = int(line_volume[3].replace(',', ''))
                future_holding_dict['volume'][int(line_volume[0])] = [line_volume[1], v, i_a_d]
                tmp_total_v += v
                tmp_total_i_a_d += i_a_d
                logger.info(line_volume)
            if total_v != tmp_total_v:
                logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            if total_i_a_d > 0 and total_i_a_d != tmp_total_i_a_d:
                logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            future_holding_dict['total_volume'] = [tmp_total_v, tmp_total_i_a_d]
        # 解析持买单量
        lines_buy = content_buy.split('\n')
        if '持买单量' not in lines_buy[0] or '总计' not in lines_buy[-1]:
            logger.error('持买单量数据格式表头有变动请检查, future_holding_downloaded_origin_file_path=' +
                         future_holding_downloaded_origin_file_path+file_name)
            return False
        if len(lines_buy) > 2:
            tmp_str_total = lines_buy[-1].replace('总计', '').strip().replace(',', '').split()
            if len(tmp_str_total) == 1:
                total_v, tmp_total_v = int(tmp_str_total[0]), 0
                total_i_a_d, tmp_total_i_a_d = 0, 0
            elif len(tmp_str_total) == 2:
                total_v, tmp_total_v = int(tmp_str_total[0]), 0
                total_i_a_d, tmp_total_i_a_d = int(tmp_str_total[1]), 0
            else:
                logger.error('成交量数据格式表尾有变动请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            for line_buy in lines_buy[1:-1]:
                line_buy = line_buy.split()
                if len(line_buy) != 4:
                    logger.error('持买单量数据格式有变动请检查, future_holding_downloaded_origin_file_path=' +
                                 future_holding_downloaded_origin_file_path+file_name)
                    return False
                v = int(line_buy[2].replace(',', ''))
                i_a_d = int(line_buy[3].replace(',', ''))
                future_holding_dict['buy'][int(line_buy[0])] = [line_buy[1], v, i_a_d]
                tmp_total_v += v
                tmp_total_i_a_d += i_a_d
                logger.info(line_buy)
            if total_v != tmp_total_v:
                logger.error('持买单量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            if total_i_a_d > 0 and total_i_a_d != tmp_total_i_a_d:
                logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            future_holding_dict['total_buy'] = [tmp_total_v, tmp_total_i_a_d]
        # 解析持卖单量
        lines_sell = list()
        tmp_lines_sell = content_sell.split('\n')
        for tmp_line in tmp_lines_sell:
            lines_sell.append(tmp_line)
            if '总计' in tmp_line:
                break
        if '持卖单量' not in lines_sell[0] or '总计' not in lines_sell[-1]:
            logger.error('持卖单量数据格式表头有变动请检查, future_holding_downloaded_origin_file_path=' +
                         future_holding_downloaded_origin_file_path+file_name)
            return False
        if len(lines_sell) > 2:
            tmp_str_total = lines_sell[-1].replace('总计', '').strip().replace(',', '').split()
            if len(tmp_str_total) == 1:
                total_v, tmp_total_v = int(tmp_str_total[0]), 0
                total_i_a_d, tmp_total_i_a_d = 0, 0
            elif len(tmp_str_total) == 2:
                total_v, tmp_total_v = int(tmp_str_total[0]), 0
                total_i_a_d, tmp_total_i_a_d = int(tmp_str_total[1]), 0
            else:
                logger.error('成交量数据格式表尾有变动请检查, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            for line_sell in lines_sell[1:-1]:
                line_sell = line_sell.split()
                if len(line_sell) != 4:
                    logger.error('持卖单量数据格式有变动请检查, future_holding_downloaded_origin_file_path=' +
                                 future_holding_downloaded_origin_file_path+file_name)
                    return False
                v = int(line_sell[2].replace(',', ''))
                i_a_d = int(line_sell[3].replace(',', ''))
                future_holding_dict['sell'][int(line_sell[0])] = [line_sell[1], v, i_a_d]
                tmp_total_v += v
                tmp_total_i_a_d += i_a_d
                logger.info(line_sell)
            if total_v != tmp_total_v:
                logger.error('持卖单量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            if total_i_a_d > 0 and total_i_a_d != tmp_total_i_a_d:
                logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' +
                             future_holding_downloaded_origin_file_path+file_name)
                return False
            future_holding_dict['total_sell'] = [tmp_total_v, tmp_total_i_a_d]
        # 写入文件
        future_holding_contract_file_path = \
            get_future_holding_contract_file_path(future_variety_code, future_contract_name)
        insert_one_day_future_holding_data_to_js(future_holding_contract_file_path, future_holding_dict)
        # break
    zip_file.close()
    return True