def parse(year): # 2010 >>> 2021 file_path = os.path.join(tmp_work_path, 'future_price_origin') file_path = os.path.join(file_path, 'history') file_path = os.path.join(file_path, 'czce_%s.txt' % year) logger.info(year + ' price file path: ' + file_path) try: with open(file_path, 'r', encoding='gbk') as f: lines = f.read().strip().split('\n') except UnicodeDecodeError as e: logger.warn(str(e)) with open(file_path, 'r', encoding='utf-8') as f: lines = f.read().strip().split('\n') for line in lines: if not line.strip().startswith(year): continue items = line.strip().replace(' ', '').replace(',', '').replace('\t', '').split('|') logger.info(str(items)) # 开始解析 trade_date = items[0].replace('-', '') czce_future_contract_name = items[1] # 提取期货品种代码 check_res = 0 future_variety_code = '' for _future_variety_code in FUTURE_VARIETIES.keys(): if _future_variety_code in czce_future_contract_name: check_res += 1 future_variety_code = _future_variety_code if check_res != 1: logger.error('合约代码对不上, czce_future_contract_name: ' + czce_future_contract_name) exit_now() # 重新命名合约名称 future_contract_name = rename_czce_future_contract_name( trade_date, czce_future_contract_name, future_variety_code) logger.info(czce_future_contract_name + ' >>> ' + future_contract_name) # 交易日期,合约名称,昨结算,开盘价,最高价,最低价,收盘价,今结算,成交量,持仓量,增减量,成交额(万元),交割结算价 new_line = [ trade_date, future_contract_name, items[2], items[3], items[4], items[5], items[6], items[7], items[10], items[11], items[12], items[13], items[14] ] logger.info('new_line=' + str(new_line)) # 将该条数据插入文件 future_price_contract_file_path = get_future_price_contract_file_path( future_variety_code, future_contract_name) logger.info('future_price_contract_file_path=' + future_price_contract_file_path) insert_one_day_future_price_data_to_csv( future_price_contract_file_path, new_line) # 交易日期 |品种月份 |昨结算 |今开盘 |最高价 |最低价 |今收盘 |今结算 |涨跌1 |涨跌2 |成交量 |空盘量 |增减量 |成交额 |交割结算价 # 2010-01-04|CF003 |16260 |16305 |16540 |16295 |16370 |16455 |110 |195 |1734 |9614 |768 |14264.61 |0 # 交易日期 | 品种月份 | 昨结算 | 今开盘 | 最高价 | 最低价 | 今收盘 | 今结算 | 涨跌1 | 涨跌2 | 成交量(手) | 空盘量 | 增减量 | 成交额(万元) | 交割结算价 | # 交易日期 | 品种代码 | 昨结算 | 今开盘 | 最高价 | 最低价 | 今收盘 | 今结算 | 涨跌1 | 涨跌2 | 成交量(手) | 空盘量 | 增减量 | 成交额(万元) | 交割结算价 | # 交易日期 | 品种代码 | 昨结算 | 今开盘 | 最高价 | 最低价 | 今收盘 | 今结算 | 涨跌1 | 涨跌2 | 成交量(手) | 持仓量 | 增减量 | 成交额(万元) | 交割结算价 | logger.info('parse_%s success!' % year)
def gen_new_cookie(self, reason): '''Starts proxy to get new cookie from a user''' logger.warn( "Cookie invalid - reason: {} - loading proxy to regenerate".format( reason)) logger.info( "In order to get a new token, we need to intercept it from the real NSO app. Please make sure you have a smartphone or Android emulator to continue." ) logger.info( "If your smartphone runs Android 7.0 or higher, you will need to use an Android emulator or an iOS device to continue." ) start_credential_proxy()
def parse_one(trade_date): """ python3 zipfile 解压文件时出现文件名为乱码情况的解决方案: 这个问题我在自己代码中跟中转换编码都不行,总出错! 介绍一种便捷方法,直接改源码: 直接找到zipfile.py文件 第一处: if zinfo.flag_bits & 0x800: # UTF-8 filename fname_str = fname.decode("utf-8") else: # fname_str = fname.decode("cp437") fname_str = fname.decode("gbk") 第二处: if flags & 0x800: # UTF-8 file names extension filename = filename.decode('utf-8') else: # Historical ZIP filename encoding # filename = filename.decode('cp437') filename = filename.decode('gbk') :param trade_date: :return: """ future_holding_downloaded_origin_file_path = \ get_future_holding_downloaded_origin_file_path(trade_date, FuturesExchange.DCE) if not os.path.exists(future_holding_downloaded_origin_file_path): logger.error('%s 文件不存在!' % future_holding_downloaded_origin_file_path) return False zip_file = zipfile.ZipFile(future_holding_downloaded_origin_file_path, "r") for file_name in zip_file.namelist(): try: content = zip_file.read(file_name).decode('utf-8').strip().replace('\r', '').replace('\n\n', '\n') except UnicodeDecodeError: logger.info('数据编码有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) content = zip_file.read(file_name).decode('gbk').strip().replace('\r', '').replace('\n\n', '\n') # 检查名次到总计的间隔 check_content_one = re.findall(r'名次.*总计', content, re.S) if not check_content_one or len(check_content_one) != 1: logger.error('数据名次到总计的间隔有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path + file_name) return False check_content_one_list = check_content_one[0].split('\n') if len(check_content_one_list) == 6: logger.info(content) logger.info('名次到总计的间隔为6,空!!!') continue lines = content.split('\n') if len(lines) < 8: logger.error('数据不足8行有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False # 检查前两行 __repair_trade_date = None if '大连商品交易所' not in lines[0] or '合约代码:' not in lines[1] or 'Date:' not in lines[1]: tmp_title = file_name.split('_') if len(tmp_title) != 5 or len(tmp_title[1]) <= 4 or tmp_title[0] != trade_date: logger.error('数据格式有变动或日期对不上请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False future_variety_code = tmp_title[1][:-4].upper() _tmp_delivery_month = tmp_title[1][-4:] else: tmp_title = lines[1].replace('合约代码:', '').replace('Date:', '').replace('-', '').strip().split() if trade_date == '20161115' and tmp_title[1] == '20161114': # 修复大连期货交易所提供的龙虎榜数据 logger.warn('repair_trade_date, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) __repair_trade_date = tmp_title[1] elif tmp_title[1] != trade_date: logger.info(str(tmp_title)+' | '+trade_date) logger.error('数据日期对不上请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if len(tmp_title) != 2 or len(tmp_title[0]) <= 4: logger.error('数据格式有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False future_variety_code = tmp_title[0][:-4].upper() if future_variety_code not in FUTURE_VARIETIES: logger.error('期货品种代码有问题请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False _tmp_delivery_month = tmp_title[0][-4:] # 重新命名合约名称 future_contract_name = '%s%s%s' % (future_variety_code, trade_date[:2], _tmp_delivery_month) # 检查数据是否完整 content = '\n'.join(lines).strip() items = content.split('名次') if len(items) != 4: logger.error('数据格式有变动三个不全请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False content_volume = items[1].strip() content_buy = items[2].strip() content_sell = items[3].strip() logger.info('%s, %s, %s | %s, %s' % (tmp_title, future_variety_code, future_contract_name, file_name, future_holding_downloaded_origin_file_path)) # 开始解析 future_holding_dict = get_future_holding_dict() future_holding_dict['code'] = future_contract_name future_holding_dict['date'] = trade_date if __repair_trade_date is not None: # 修复大连期货交易所提供的龙虎榜数据 future_holding_dict['date'] = __repair_trade_date # 解析成交量 lines_volume = content_volume.split('\n') if '成交量' not in lines_volume[0] or '总计' not in lines_volume[-1]: logger.error('成交量数据格式表头有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if len(lines_volume) > 2: tmp_str_total = lines_volume[-1].replace('总计', '').strip().replace(',', '').split() if len(tmp_str_total) == 1: total_v, tmp_total_v = int(tmp_str_total[0]), 0 total_i_a_d, tmp_total_i_a_d = 0, 0 elif len(tmp_str_total) == 2: total_v, tmp_total_v = int(tmp_str_total[0]), 0 total_i_a_d, tmp_total_i_a_d = int(tmp_str_total[1]), 0 else: logger.error('成交量数据格式表尾有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False for line_volume in lines_volume[1:-1]: line_volume = line_volume.split() if len(line_volume) != 4: logger.error('成交量数据格式有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False v = int(line_volume[2].replace(',', '')) i_a_d = int(line_volume[3].replace(',', '')) future_holding_dict['volume'][int(line_volume[0])] = [line_volume[1], v, i_a_d] tmp_total_v += v tmp_total_i_a_d += i_a_d logger.info(line_volume) if total_v != tmp_total_v: logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if total_i_a_d > 0 and total_i_a_d != tmp_total_i_a_d: logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False future_holding_dict['total_volume'] = [tmp_total_v, tmp_total_i_a_d] # 解析持买单量 lines_buy = content_buy.split('\n') if '持买单量' not in lines_buy[0] or '总计' not in lines_buy[-1]: logger.error('持买单量数据格式表头有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if len(lines_buy) > 2: tmp_str_total = lines_buy[-1].replace('总计', '').strip().replace(',', '').split() if len(tmp_str_total) == 1: total_v, tmp_total_v = int(tmp_str_total[0]), 0 total_i_a_d, tmp_total_i_a_d = 0, 0 elif len(tmp_str_total) == 2: total_v, tmp_total_v = int(tmp_str_total[0]), 0 total_i_a_d, tmp_total_i_a_d = int(tmp_str_total[1]), 0 else: logger.error('成交量数据格式表尾有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False for line_buy in lines_buy[1:-1]: line_buy = line_buy.split() if len(line_buy) != 4: logger.error('持买单量数据格式有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False v = int(line_buy[2].replace(',', '')) i_a_d = int(line_buy[3].replace(',', '')) future_holding_dict['buy'][int(line_buy[0])] = [line_buy[1], v, i_a_d] tmp_total_v += v tmp_total_i_a_d += i_a_d logger.info(line_buy) if total_v != tmp_total_v: logger.error('持买单量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if total_i_a_d > 0 and total_i_a_d != tmp_total_i_a_d: logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False future_holding_dict['total_buy'] = [tmp_total_v, tmp_total_i_a_d] # 解析持卖单量 lines_sell = list() tmp_lines_sell = content_sell.split('\n') for tmp_line in tmp_lines_sell: lines_sell.append(tmp_line) if '总计' in tmp_line: break if '持卖单量' not in lines_sell[0] or '总计' not in lines_sell[-1]: logger.error('持卖单量数据格式表头有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if len(lines_sell) > 2: tmp_str_total = lines_sell[-1].replace('总计', '').strip().replace(',', '').split() if len(tmp_str_total) == 1: total_v, tmp_total_v = int(tmp_str_total[0]), 0 total_i_a_d, tmp_total_i_a_d = 0, 0 elif len(tmp_str_total) == 2: total_v, tmp_total_v = int(tmp_str_total[0]), 0 total_i_a_d, tmp_total_i_a_d = int(tmp_str_total[1]), 0 else: logger.error('成交量数据格式表尾有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False for line_sell in lines_sell[1:-1]: line_sell = line_sell.split() if len(line_sell) != 4: logger.error('持卖单量数据格式有变动请检查, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False v = int(line_sell[2].replace(',', '')) i_a_d = int(line_sell[3].replace(',', '')) future_holding_dict['sell'][int(line_sell[0])] = [line_sell[1], v, i_a_d] tmp_total_v += v tmp_total_i_a_d += i_a_d logger.info(line_sell) if total_v != tmp_total_v: logger.error('持卖单量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False if total_i_a_d > 0 and total_i_a_d != tmp_total_i_a_d: logger.error('成交量各期货会员总持仓核对不上, future_holding_downloaded_origin_file_path=' + future_holding_downloaded_origin_file_path+file_name) return False future_holding_dict['total_sell'] = [tmp_total_v, tmp_total_i_a_d] # 写入文件 future_holding_contract_file_path = \ get_future_holding_contract_file_path(future_variety_code, future_contract_name) insert_one_day_future_holding_data_to_js(future_holding_contract_file_path, future_holding_dict) # break zip_file.close() return True