def download(trade_date): # http://www.czce.com.cn/cn/DFSStaticFiles/Future/2021/20210507/FutureDataDaily.txt url_model = 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/%s/%s/FutureDataDaily.txt' # 货行情数据下载链接 request_url = '' year = trade_date[:4] logger.info('year=%s trade_date=%s' % (year, trade_date)) if trade_date >= '20210104': # 20201231 request_url = url_model % (year, trade_date) else: logger.error('期货行情数据下载日期有误请检查!') exit_now() logger.info('request_url = ' + request_url) # 下载货行情数据 future_price_downloaded_origin_file_path = \ get_future_price_downloaded_origin_file_path(trade_date, FuturesExchange.CZCE) if os.path.exists(future_price_downloaded_origin_file_path): logger.info('%s %s期货行情数据已下载:' % (future_price_downloaded_origin_file_path, trade_date)) return True res = requests.get(request_url) if res.status_code != 200: logger.error('response.status_code=%s, download %s failed' % (str(res.status_code), request_url)) return False with open(future_price_downloaded_origin_file_path, 'w', encoding='utf-8') as f: f.write(res.text) return True
def get_future_price_contract_data_by_future_contract_name( future_variety_code, future_contract_name): future_price_contract_file_path = get_future_price_contract_file_path( future_variety_code, future_contract_name) with open(future_price_contract_file_path, 'r', encoding='utf-8') as f: lines = [line.split(',') for line in f.read().strip().split('\n')] new_lines = dict() for line in lines[1:]: trade_date = line[0] if trade_date in new_lines: logger.error('期货合约价格数据中有重复,请检查!' + trade_date + ' | ' + future_price_contract_file_path) exit_now() new_lines[trade_date] = line return new_lines
def parse_one(trade_date): future_holding_downloaded_origin_file_path = \ get_future_holding_downloaded_origin_file_path(trade_date, FuturesExchange.CZCE) if not os.path.exists(future_holding_downloaded_origin_file_path): logger.error( 'future_holding_downloaded_origin_file_path not exists, ' + future_holding_downloaded_origin_file_path) return False if trade_date >= '20150921': parse_downloaded_origin_file_from_20150921( trade_date, future_holding_downloaded_origin_file_path) elif '20150918' >= trade_date >= '20100818': parse_downloaded_origin_file_from_20100818_to_20150918( trade_date, future_holding_downloaded_origin_file_path) else: logger.error('parse_future_holding.parse(), 日期有误请检查!') exit_now() return True
def parse_video_id(): video_ids = list() with open(os.path.join(andy_lee_work_path, 'Andy_Lee.html'), 'r', encoding='utf-8') as f: content_html = f.read() bs_obj = BeautifulSoup(content_html, "lxml") items_list = bs_obj.findAll("a", {"id": "video-title"}) if not items_list or len(items_list) == 0: logger.error('Andy_Lee.html没有解析出数据!') return False for item in items_list: if not item['href'].startswith('/watch?v='): logger.error('youtube video url error, %s' % item['href']) exit_now() video_id = item['href'].replace('/watch?v=', '')[:11] if video_id not in video_ids: video_ids.append(video_id) logger.info(video_id) logger.info('items length = %s' % len(video_ids)) with open(andy_lee_all_video_ids_file_path, 'w', encoding='utf-8') as f: f.write('\n'.join(video_ids))
def read_data(self): future_price_dict = self.get_future_price_contract_data_by_future_contract_name( self.future_variety_code, self.future_contract_name) future_holding_dict = self.get_future_holding_contract_data_by_future_contract_name( self.future_variety_code, self.future_contract_name) future_members_buy_holding_dict = self.get_future_members_holding_dict( len(future_price_dict.keys())) future_members_sell_holding_dict = self.get_future_members_holding_dict( len(future_price_dict.keys())) index = 0 for trade_date, line in future_price_dict.items(): if trade_date not in self.trade_calendar: logger.error('交易日历有问题,请检查!') exit_now() self.trade_date_list.append(trade_date) # 收盘价 if float(line[6]) == 0: self.price_list.append(None) else: self.price_list.append(float(line[6])) # 持仓量 total_holding = float(line[9]) self.total_holding_list.append(total_holding) # 警戒线 self.cordon_list.append(0.5) if trade_date not in future_holding_dict: self.buy_list.append(None) self.sell_list.append(None) self.individual_buy_list.append(None) self.individual_sell_list.append(None) else: buy_holding = float( future_holding_dict[trade_date]['total_buy'][0]) sell_holding = float( future_holding_dict[trade_date]['total_sell'][0]) self.buy_list.append( (buy_holding / total_holding) * 100 / 100.0) self.sell_list.append( (sell_holding / total_holding) * 100 / 100.0) self.individual_buy_list.append(1 - (buy_holding / total_holding) * 100 / 100.0) self.individual_sell_list.append( 1 - (sell_holding / total_holding) * 100 / 100.0) # 多单持仓 buy = future_holding_dict[trade_date]['buy'] for value in buy.values(): future_members_buy_holding_dict[value[0]][index] = value[1] # 空单持仓 sell = future_holding_dict[trade_date]['sell'] for value in sell.values(): future_members_sell_holding_dict[ value[0]][index] = value[1] index += 1 for future_member_name, future_member_holding_list in future_members_buy_holding_dict.items( ): total_num = 0 for v in future_member_holding_list: if v is None: continue total_num += v if total_num != 0: self.new_future_members_buy_holding_dict[ future_member_name] = future_member_holding_list for future_member_name, future_member_holding_list in future_members_sell_holding_dict.items( ): total_num = 0 for v in future_member_holding_list: if v is None: continue total_num += v if total_num != 0: self.new_future_members_sell_holding_dict[ future_member_name] = future_member_holding_list
# @Author : 胡远 # @Github : https://github.com/QuixoteHY # @Email : [email protected] # @Describe : from config.logger import logger from config.utils import exit_now from downloader.video.youtube import video_youtube_download_path, download_youtube_video_by_youtube_dl_in_cmd import os from bs4 import BeautifulSoup andy_lee_work_path = os.path.join(video_youtube_download_path, 'Andy_Lee') if not os.path.exists(andy_lee_work_path): logger.error('工作目录不存在!%s' % andy_lee_work_path) exit_now() andy_lee_video_path = os.path.join(andy_lee_work_path, 'video') if not os.path.exists(andy_lee_video_path): os.makedirs(andy_lee_video_path) andy_lee_all_video_ids_file_path = os.path.join(andy_lee_work_path, 'all_video_ids.txt') andy_lee_downloaded_record_file_path = os.path.join(andy_lee_work_path, 'downloaded_video_id.txt') def read_downloaded_video_ids(): downloaded_video_ids = list() for file_name in os.listdir(andy_lee_video_path): video_id = file_name[9:20] if video_id not in downloaded_video_ids: downloaded_video_ids.append(video_id) return downloaded_video_ids
def parse_downloaded_origin_file_from_20100818_to_20150918( trade_date, future_holding_downloaded_origin_file_path): with open(future_holding_downloaded_origin_file_path, 'r', encoding='utf-8') as f: items = f.read().replace('合约', '品种').split('品种') for index, item in enumerate(items): lines = item.strip().replace('\n\n', '\n').replace(' ', '').replace('\t', '').split('\n') logger.info('>' * 10 + trade_date + '\t' + '\n'.join(lines)) # 检查最后一行,合计 if '合计' not in lines[-1]: if index == 0: continue else: logger.error('缺少合计, ' + str(lines[-1])) exit_now() # 检查第一行, :棉花日期:2010-08-18 :ER105日期:2010-08-18 __future_varietties_dict = { 'CF': '棉花', 'RI': '早籼', 'OI': '菜油', 'SR': '白糖', 'TA': 'PTA', 'WS': '强麦', 'WT': '硬麦', 'MA': '甲醇', 'PM': '普麦', 'FG': '玻璃', 'RM': '菜粕', 'RS': '菜籽', # 'ZC': '动力煤', 'TC': '煤', 'JR': '粳稻', 'LR': '晚籼', 'SF': '硅铁', 'SM': '锰硅', } check_res = 0 future_variety_code = '' for _future_variety_code in FUTURE_VARIETIES.keys(): if _future_variety_code in lines[0]: check_res += 1 future_variety_code = _future_variety_code for _future_variety_code, _future_variety_name in __future_varietties_dict.items( ): if _future_variety_name in lines[0]: check_res += 1 future_variety_code = _future_variety_code if check_res != 1 and future_variety_code not in ('TA', ): logger.error('合约代码对不上, lines[0]: ' + str(lines[0]) + '|future_variety_code=' + future_variety_code) exit_now() # 重新命名合约名称 (:棉花日期:2010-08-18 :ER105日期:2010-08-18) future_contract_name = None czce_future_contract_name, future_date_str = \ lines[0].strip().replace('-', '').replace(':', '').replace(':', '').split('日期') if czce_future_contract_name in FUTURE_VARIETIES[future_variety_code][ '品种全称']: future_contract_name = 'total' elif future_variety_code in czce_future_contract_name: future_contract_name = \ rename_czce_future_contract_name(trade_date, czce_future_contract_name, future_variety_code) logger.info(czce_future_contract_name + ' >>> ' + future_contract_name) else: logger.error('合约代码有问题, lines[0]: ' + str(lines[0]) + ' | 品种全称=' + FUTURE_VARIETIES[future_variety_code]['品种全称']) exit_now() # 检查日期 if future_date_str != trade_date or len(future_date_str) != 8: logger.error('日期对不上, lines[0]: ' + str(lines[0])) exit_now() # 解析数据 future_holding_dict = get_future_holding_dict() future_holding_dict['code'] = future_contract_name future_holding_dict['date'] = trade_date for line in lines[1:-1]: if '.0' not in line: logger.error('格式不对请检查, line=' + line) exit_now() line = line.replace('.0', '') line = line.strip().split(',') logger.info('line=' + str(line)) if line[1] != '': future_holding_dict['volume'][int( line[0])] = [line[1], int(line[2]), int(line[3])] if line[4] != '': future_holding_dict['buy'][int( line[0])] = [line[4], int(line[5]), int(line[6])] if line[7] != '': future_holding_dict['sell'][int( line[0])] = [line[7], int(line[8]), int(line[9])] line = lines[-1] if '.0' not in line: logger.error('格式不对请检查, line=' + line) exit_now() line = line.replace('.0', '') line = line.strip().split(',') if line[1] != '': line.insert(1, '') logger.info('合计 line=' + str(line)) future_holding_dict['total_volume'] = [int(line[2]), int(line[3])] future_holding_dict['total_buy'] = [int(line[5]), int(line[6])] future_holding_dict['total_sell'] = [int(line[8]), int(line[9])] # 写入文件 future_holding_contract_file_path = \ get_future_holding_contract_file_path(future_variety_code, future_contract_name) insert_one_day_future_holding_data_to_js( future_holding_contract_file_path, future_holding_dict)
def parse_downloaded_origin_file_from_20150921( trade_date, future_holding_downloaded_origin_file_path): with open(future_holding_downloaded_origin_file_path, 'r', encoding='utf-8') as f: items = f.read().replace('合约', '品种').split('品种') for index, item in enumerate(items): lines = item.strip().replace('\n\n', '\n').replace(' ', '').replace(',', '').split('\n') logger.info('>' * 10 + trade_date + '\t' + '\n'.join(lines)) # 检查最后一行,合计 if '合计' not in lines[-1]: if index == 0: continue else: logger.error('缺少合计, ' + str(lines[-1])) exit_now() # 检查第二行表头 if '名次' not in lines[1]: logger.error('缺少表头, ' + str(lines[1])) exit_now() # 第一行提取期货品种代码, 棉花CF日期::2015-09-21 future_variety_code = None czce_future_contract_name = lines[0].replace( ':', '').split('日期')[0].strip() check_res = 0 for __code, __item in FUTURE_VARIETIES.items(): if czce_future_contract_name == __item['品种全称']: check_res += 1 future_variety_code = __code if check_res > 1: logger.error('合约代码与品种全称匹配数大于1, czce_future_contract_name: %s, %s' % (czce_future_contract_name, lines[0])) exit_now() if future_variety_code is None: if len(czce_future_contract_name) < 4 or czce_future_contract_name[-1] not in __digit or \ czce_future_contract_name[-2] not in __digit or czce_future_contract_name[-3] not in __digit: logger.error('合约代码有问题, czce_future_contract_name: %s, %s' % (czce_future_contract_name, lines[0])) exit_now() future_variety_code = czce_future_contract_name[:-3] # 重新命名合约名称 future_contract_name = None czce_future_contract_name, future_date_str = \ lines[0].strip().replace('-', '').replace(':', '').replace(':', '').split('日期') if czce_future_contract_name == FUTURE_VARIETIES[future_variety_code][ '品种全称']: future_contract_name = 'total' elif future_variety_code in czce_future_contract_name: future_contract_name = \ rename_czce_future_contract_name(trade_date, czce_future_contract_name, future_variety_code) logger.info(czce_future_contract_name + ' >>> ' + future_contract_name) else: logger.error('合约代码有问题, lines[0]: ' + str(lines[0])) exit_now() # 检查日期 if future_date_str != trade_date or len(future_date_str) != 8: logger.error('日期对不上, lines[0]: ' + str(lines[0])) exit_now() # 解析数据 future_holding_dict = get_future_holding_dict() future_holding_dict['code'] = future_contract_name future_holding_dict['date'] = trade_date for line in lines[2:-1]: line = line.strip().split('|') if line[1] != '-': future_holding_dict['volume'][int( line[0])] = [line[1], int(line[2]), int(line[3])] if line[4] != '-': future_holding_dict['buy'][int( line[0])] = [line[4], int(line[5]), int(line[6])] if line[7] != '-': future_holding_dict['sell'][int( line[0])] = [line[7], int(line[8]), int(line[9])] line = lines[-1].strip().split('|') future_holding_dict['total_volume'] = [int(line[2]), int(line[3])] future_holding_dict['total_buy'] = [int(line[5]), int(line[6])] future_holding_dict['total_sell'] = [int(line[8]), int(line[9])] # 写入文件 future_holding_contract_file_path = \ get_future_holding_contract_file_path(future_variety_code, future_contract_name) insert_one_day_future_holding_data_to_js( future_holding_contract_file_path, future_holding_dict)