def set_process_data_market_stock_to_redis(self): """ 将所有process_data_market_day股票日K线统统存入redis """ logger.info( '\n=====set_process_data_market_stock_to_redis start=====', end='\n') logger.info('开始时间:%s' % datetime.now()) if not self.debug: stock_list = pd.read_csv(const.ORIGIN_DATA_STOCK_BASIC) else: stock_list = pd.read_csv(const.DEBUG_DATA_STOCK_BASIC) sdr = StockDataRepo() count = 1 length = len(stock_list) for index, row in stock_list.iterrows(): sdr.set_process_data_market_day_data(row['ts_code']) percent = round(1.00 * count / length * 100, 2) logger.info( '进度 : %s [%d/%d]' % (str(percent) + '%', count, length), ) count = count + 1 logger.info('结束时间:%s' % datetime.now()) logger.info( '=====set_process_data_market_stock_to_redis done!=====')
def user_login_operator(self, info: dict): user_name = info.get('user_name') user_pw = info.get('user_pw') info_time = info.get('time') if not all([user_name, user_pw, info_time]): logger.error('incomplete params') return 'incomplete params', 421 user_check = self.user_collection.find_one({ "user_name": user_name, "user_pw": user_pw, "activate": 1 }) if user_check: self.user_log_collection.insert_one({ 'id': { 'user_id': user_check['_id'] }, 'operator': user_name, 'time': info_time, 'action': 'operator %s login' % user_name }) logger.info("login %s" % user_name) return user_check['type'], 200 else: logger.error("user:%s didn't exist" % user_name) return 'login failed', 421
def delete_comments(user): token = user.token headers = {'Authorization': f'Bearer {token}', 'User-Agent': USER_AGENT} resp = requests.get(f'https://oauth.reddit.com/user/{user.name}/comments', headers=headers) if resp.status_code >= 401: return None comments = resp.json().get('data').get('children') comments = [(c.get('data').get('id'), c.get('data').get('score')) for c in comments] comments = [c for c in comments if c[1] < -1] for id, s in comments: if s < -1: logger.info(f'deleting post {id} with score {s}') data = {'id': 't1_{id'} resp = requests.post('https://oauth.reddit.com/api/del', headers=headers, json=data) logger.info(resp.status_code) try: logger.info(resp.json()) except: logger.info(resp.content) user.deleted_count = user.deleted_count + len(comments) db.session.commit() logger.info(f'[{user.name}] deleted {len(comments)} comments') return True
def run_only_once(self): """ 下载初始化数据,注意顺序不要乱,初始化数据以 last_update_time 为准 第一次执行,且只需运行一次即可 """ logger.info('=====TushareFetch run_only_once start=====') logger.info('开始时间:%s' % datetime.now()) # 判断 origin 目录下的上证指数文件是否存在,不存在则开始初始化下载 filename = os.path.join(const.origin_data_index_day_path, const.CODE_INDEX_SH + '.csv') if not os.path.exists(filename): if not self.debug: # 下载股票基础资料 self.get_stock_basic() # 下载交易日历 self.get_stock_trade_cal(self.last_update_time) # 下载股票日K线 self.only_once_all_stock_data(self.last_update_time) # 下载沪深港通数据 self.only_once_hsgt_data(self.last_update_time) # 一定最后下载指数数据 self.only_once_stock_index_day(self.last_update_time) else: logger.info('文件已经存在,无需在初始化下载,请直接运行每日更新任务!') logger.info('结束时间:%s' % datetime.now()) logger.info('=====TushareFetch run_only_once done!=====')
def trans_all_week(self): """ 获取全部股票的全部日线 """ stock_list = pd.read_csv(const.ORIGIN_DATA_STOCK_BASIC) for index, row in stock_list.iterrows(): logger.info(row["ts_code"], row['symbol'], row['name'], row["list_date"]) self.trans_day2week(row['symbol'])
def check_folder(self): """ 检测目录结构,不存在就创建 """ # select_data 目录 if not os.path.exists(const.select_data_root_path): os.makedirs(const.select_data_root_path) logger.info('mkdir %s' % const.select_data_root_path) logger.debug('select_data 目录检测完毕!')
def gui_config_check(self, info: dict): gui_no = info.get('gui_no') if not gui_no: logger.error('incomplete params') return 'incomplete params', 421, {'Content-Type': 'application/json'} gui_setting_check = self.gui_setting_collection.find_one({'gui_no': gui_no}, {'_id': 0}) if not gui_setting_check: return 'null', 400, {'Content-Type': 'application/json'} logger.info('gui_config_check') return json.dumps(gui_setting_check), 200, {'Content-Type': 'application/json'}
def el_panel_config_check(self, info: dict): el_no = info.get('el_no') if not el_no: logger.error('incomplete params') return 'incomplete params', 421, {'Content-Type': 'application/json'} el_check = self.el_config_collection.find_one({'el_no': el_no}, {'_id': 0}) if not el_check: return 'null', 400, {'Content-Type': 'application/json'} logger.info('el_panel_config_check') return json.dumps(el_check), 200, {'Content-Type': 'application/json'}
def run_only_once(self): """ 初始化下载形成origin文件,初始化处理计算各个指标形成process文件,初始化计算emotion指标形成emotion文件 """ logger.info('run_only_once') t = TushareFetch() t.run_only_once() p = ProcessStockData() p.run_only_once() e = EmotionIndex() e.run_only_once()
def only_once_stock_daily_by_code(self, ts_code, start_date, end_date): """ 获取单个股票日线行情,初始化股票数据时用 """ df = self.pro.daily( ts_code=ts_code, start_date=start_date, end_date=end_date) name = '%s.csv' % (ts_code) filename = os.path.join(const.origin_data_market_day_path, name) df.to_csv(filename, index=False, columns=const.COLUMNS) logger.info('%s 成功' % name) logger.info('文件:%s' % filename)
def get_hsgt_data_by_10jqka(self, trade_date): """ 从同花顺下载 trade_date 日期的沪深港通日数据,注意 trade_date只能是 最近10天之内,因为页面上最多10条 """ urls = { # 沪股通 'hgt': 'http://data.10jqka.com.cn/hgt/hgtb/', # 深股通 'sgt': 'http://data.10jqka.com.cn/hgt/sgtb/', # # 港股通(沪) # 'ggt_ss': 'http://data.10jqka.com.cn/hgt/ggtb/', # # 港股通(深) # 'ggt_sz': 'http://data.10jqka.com.cn/hgt/ggtbs/' } money_data = { # 沪股通 'hgt': 0, # 深股通 'sgt': 0, # # 港股通(沪) # 'ggt_ss': 0, # # 港股通(深) # 'ggt_sz': 0 } logger.info(money_data) # 北向资金 # north_money = 0 # 南向资金 # south_money = 0 headers = { 'Accept': 'text/html, */*; q=0.01', 'Accept-Language': 'zh-cn', 'Host': 'data.10jqka.com.cn', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15', 'Referer': 'http://data.10jqka.com.cn/hgt/hgtb/', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', } for key in urls.keys(): r = requests.get(urls[key], headers=headers, timeout=5) # logger.info(r.text) html = etree.HTML(r.text) trs = html.xpath('//*[@id="table1"]/table/tbody/tr') for tr in trs: if (tr[0].text == trade_date): # logger.info(tr[1].text) s = tr[1].text.replace('亿', '') money_data[key] = round(float(s) * 100, 2) break logger.info(money_data) return money_data
def user_delete(self, info: dict): t = time.time() user_name = info.get('user_name') admin_name = info.get('admin_name') info_time = info.get('time') if not all([user_name, admin_name, info_time]): logger.error('incomplete params') return update(), 400, {'Content-Type': 'application/json'} admin_check = self.user_collection.find_one({ 'user_name': admin_name, 'activate': 1 }) if not admin_check: logger.error("admin user:%s didn't exist" % admin_name) return "admin user didn't exist", 400, { 'Content-Type': 'application/json' } if admin_check['type'] != 'super_admin' and admin_check[ 'type'] != 'yc_admin': logger.error("permission denied %s" % admin_name) return update(), 423, {'Content-Type': 'application/json'} user_check = self.user_collection.find_one({ 'user_name': user_name, 'activate': 1 }) if user_check['type'] == 'super_admin' and admin_check[ 'type'] == 'super_admin': logger.error("permission denied %s" % (info["admin_name"])) return update(), 423, {'Content-Type': 'application/json'} user_check['activate'] = time.time() user_check['update_time'] = t self.user_collection.replace_one( { 'user_name': user_name, 'activate': 1 }, user_check) self.user_log_collection.insert_one({ 'id': { 'user_id': user_check['_id'], 'admin_id': admin_check['_id'] }, 'operator': admin_name, 'user_name': user_name, 'time': info_time, 'action': "%s delete user %s" % (admin_name, user_name) }) logger.info("user_del_%s" % (info["user_name"])) return update(), 200, {'Content-Type': 'application/json'}
def user_login_admin(self, info: dict): res = dict() user_name = info.get('user_name') user_pw = info.get('user_pw') info_time = info.get('time') admin_url = info.get('admin_url') if not all([user_name, user_pw, info_time, admin_url]): logger.error('incomplete params') return 'incomplete params', 421, { 'Content-Type': 'application/json' } user_check = self.user_collection.find_one({ "user_name": user_name, "user_pw": user_pw, "activate": 1 }) if not user_check: logger.error("user:%s didn't exist" % user_name) return "user didn't exist", 421, { 'Content-Type': 'application/json' } if user_check['type'] == 'operator': return "not admin", 421, {'Content-Type': 'application/json'} self.user_log_collection.insert_one({ 'id': { 'user_id': user_check['_id'] }, 'operator': user_name, 'time': info_time, 'action': "admin %s login" % user_name }) res['type'] = user_check['type'] # pre_url = url.get(admin_url) # res['previous_url'] = user_check.get("previous_url") if user_check.get('previous_url') != pre_url else '' # user_check['previous_url'] = pre_url # self.user_collection.replace_one({'user_name': user_name, 'activate': 1}, user_check) # 这句话是沙雕吧 res['permission_mng'] = list( self.permission_collection.find({}, {'_id': 0})) res['line_setting'] = list( self.el_config_collection.find({}, {'_id': 0})) res['string_setting'] = list( self.el_string_collection.find({}, {'_id': 0})) res['gui_setting'] = list( self.gui_setting_collection.find({}, {'_id': 0})) logger.info("admin_login_%s" % user_name) return json.dumps(res), 200, {'Content-Type': 'application/json'}
def user_add(self, info: dict): t = time.time() user_name = info.get('user_name') user_pw = info.get('user_pw') admin_name = info.get('admin_name') user_type = info.get('type') info_time = info.get('time') if not all([user_name, user_pw, admin_name, user_type, info_time]): logger.error('incomplete params') return update(), 400, {'Content-Type': 'application/json'} admin_check = self.user_collection.find_one({ 'user_name': admin_name, 'activate': 1 }) if not admin_check: logger.error("admin user:%s didn't exist" % admin_name) return "admin user didn't exist", 400, { 'Content-Type': 'application/json' } if admin_check['type'] != 'super_admin' and admin_check[ 'type'] != 'yc_admin': logger.error("permission denied %s" % admin_name) return update(), 423, {'Content-Type': 'application/json'} user_check = self.user_collection.find_one({ 'user_name': user_name, 'activate': 1 }) if user_check: return 'user exists', 413, {'Content-Type': 'application/json'} res = self.user_collection.insert_one({ "user_name": user_name, "user_pw": user_pw, "activate": 1, "type": user_type, "update_time": t }) self.user_log_collection.insert_one({ 'id': { 'admin_id': admin_check["_id"], 'user_id': ObjectId(res.inserted_id) }, 'operator': admin_name, 'user_name': user_name, 'time': info_time, 'action': "%s add user %s" % (admin_name, user_name) }) logger.info("user_add{%s}" % user_name) return update(), 200, {'Content-Type': 'application/json'}
def get_cal_end_date(self): """ 获取交易日历文件的结束日期 """ if self.debug: df = pd.read_csv(const.DEBUG_DATA_STOCK_TRADE_CAL) else: df = pd.read_csv(const.ORIGIN_DATA_STOCK_TRADE_CAL) # logger.info(self.debug) df = df[(df['is_open'] > 0)] end_date = df['cal_date'].values[0] logger.info(end_date) return str(end_date)
def run_daily_job(self): """ 日常任务,处理所有数据 """ # 计算从 last_update_time 到今天以来的需要更新日期 date_list = self.get_update_duration() if len(date_list) > 0: # 计算一般指标数据 self.compute_stock_indicators() # 一定最后处理指数数据 self.compute_index_indicators() # 把所有股票数据文件存入redis,这里必不可少,因为日k线数据已经更新了 # self.set_process_data_market_stock_to_redis() # 生成trade_date维度的股票数据文件 count = 1 length = len(date_list) logger.info('=====generate_trade_date_day_file start=====') for date in date_list: self.generate_trade_date_day_file(date) percent = round(1.00 * count / length * 100, 2) logger.info( '计算日期:%s, 进度 : %s [%d/%d]' % (date, str(percent) + '%', count, length), ) count = count + 1 logger.info('=====generate_trade_date_day_file end=====') return True else: logger.info('没有需要处理的数据') return False
def get_last_update_time(self, filename): """ 根据源文件判断最后更新日期 """ if os.path.exists(filename): logger.info('文件:%s' % filename) df = pd.read_csv(filename) logger.info(df) # 将数据按照交易日期从近到远排序 df = df.sort_values(by=['trade_date'], ascending=False) df = df[0:1] # 取第一行数据 return df['trade_date'].values[0] # 取trade_date列值 # logger.info(self.last_update_time) else: return self.last_update_time
def show_compute_index_indicators(self, o_filename, p_filename, code, count, length, is_index): """ 显示处理进度 """ if os.path.exists(o_filename): stock_data = pd.read_csv(o_filename) stock_data = self.compute_indicators(stock_data, is_index) columns = const.COLUMNS.extend(const.INDICATOR_COLUMNS) stock_data.to_csv(p_filename, index=False, columns=columns) percent = round(1.00 * count / length * 100, 2) logger.info( '进度 : %s [%d/%d],code:%s' % ( (str(percent) + '%', count, length, code)), )
def refresh_token(user): req_data = { 'grant_type': 'refresh_token', 'refresh_token': user.refresh_token } resp = requests.post('https://www.reddit.com/api/v1/access_token', data=req_data, auth=(CLIENT_ID, CLIENT_SECRET), headers={'User-Agent': USER_AGENT}) resp = resp.json() token = resp.get('access_token') logger.info( f'[user {user.name}] requested a refreshed token from the server and got: {token}' ) return token
def timed(*args): start_time = time.time() result = func(*args) # 计算时间差值 seconds, minutes, hours = int(time.time() - start_time), 0, 0 hours = seconds // 3600 minutes = (seconds - hours * 3600) // 60 seconds = seconds - hours * 3600 - minutes * 60 name = func.__name__ arg_str = ', '.join(repr(arg) for arg in args) logger.info("{n}({a})".format(n=name, a=arg_str)) logger.info("elapsed time: {:>02d}:{:>02d}:{:>02d}".format( hours, minutes, seconds)) return result
def hsgt_day_drop_duplicates(self): """ 沪深港通文件去重 """ logger.info('=====沪深港通文件去重=====') logger.info('开始时间:%s' % datetime.now()) self.drop_duplicates(const.origin_data_ref_path) logger.info('结束时间:%s' % datetime.now()) logger.info('=====沪深港通文件去重 done!=====')
def index_day_drop_duplicates(self): """ 指数文件去重 """ logger.info('=====指数文件去重=====') logger.info('开始时间:%s' % datetime.now()) self.drop_duplicates(const.origin_data_index_day_path) logger.info('结束时间:%s' % datetime.now()) logger.info('=====指数文件去重 done!=====')
def gui_config_modify(self, info: dict): t = time.time() gui_no = info.get('gui_no') admin_name = info.get('admin_name') changed_items = info.get('changed_items') info_time = info.get('time') if not all([gui_no, admin_name, changed_items, info_time]): logger.error('incomplete params') return update(), 400, {'Content-Type': 'application/json'} gui_check = self.gui_setting_collection.find_one({'gui_no': gui_no}) admin_check = self.user_collection.find_one({"user_name": info["admin_name"], "activate": 1}) if not gui_check: logger.error("gui_no:%s didn't exist" % (info["admin_name"])) return update(), 422, {'Content-Type': 'application/json'} try: if gui_check["update_time"] != changed_items["update_time"]: return update(), 422, {'Content-Type': 'application/json'} limit = list(self.el_config_collection.aggregate([ {'$match': {'gui_no': gui_no}}, {'$group': {'_id': '$gui_no', 'limit': {'$sum': 1}}} ])) if limit[0]['limit'] > int(changed_items['el_limit']): return update(), 412, {'Content-Type': 'application/json'} changed_before = dict() changed_after = dict() for key, value in changed_items.items(): if (pre_data := gui_check.get(key)) != value: changed_before[key] = pre_data changed_after[key] = value gui_check[key] = value gui_check['update_time'] = t self.gui_setting_collection.replace_one({"gui_no": gui_no}, gui_check) self.user_log_collection.insert_one({ 'id': { 'admin_id': admin_check['_id'], 'gui_id': gui_check['_id'] }, 'operator': admin_name, 'gui_no': gui_no, 'time': info_time, 'action': "change gui_config", 'changed_before': changed_before, 'changed_after': changed_after }) logger.info('gui_config_modify') return update(), 200, {'Content-Type': 'application/json'}
def stock_day_drop_duplicates(self): """ 股票文件去重 """ logger.info('=====股票文件去重=====') logger.info('开始时间:%s' % datetime.now()) self.drop_duplicates(const.origin_data_market_day_path) logger.info('结束时间:%s' % datetime.now()) logger.info('=====股票文件去重 done!=====')
def select_rise_limit_times(self, duration_days=31, times=5): """ 找出过去duration_days天内连板数大于等于times次的股票 """ logger.info('=====找出过去duration_days天内连板数大于等于times次的股票=====') logger.info('开始时间:%s' % datetime.now()) if not self.debug: stock_list = pd.read_csv(const.ORIGIN_DATA_STOCK_BASIC) else: stock_list = pd.read_csv(const.DEBUG_DATA_STOCK_BASIC) count = 1 df = pd.DataFrame(columns=['ts_code']) # logger.info(df) p_filename = 'select_rise_limit_times_%s_%s.csv' % (duration_days, times) p_filename = os.path.join(const.select_data_root_path, p_filename) # logger.info(p_filename) for index, row in stock_list.iterrows(): o_filename = os.path.join(const.process_data_market_day_path, row["ts_code"] + '.csv') code = row["ts_code"] length = len(stock_list) if os.path.exists(o_filename): stock_data = pd.read_csv(o_filename) # 取近30天的数据 stock_data = stock_data[0:duration_days] # logger.debug(stock_data) tmp = np.where(stock_data['rise_limit_count'] >= times) # logger.debug(code) # logger.debug(tmp) # logger.debug(len(tmp[0])) if (len(tmp[0]) > 0): df = df.append(pd.Series([code], index=['ts_code']), ignore_index=True) # logger.info(df) # logger.debug(len(tmp[0])) percent = round(1.00 * count / length * 100, 2) logger.info( '进度 : %s [%d/%d],code:%s' % ((str(percent) + '%', count, length, code)), ) count = count + 1 # logger.info(df) df.to_csv(p_filename, index=False) logger.info('结束时间:%s' % datetime.now()) logger.info('=====找出过去duration_days天内连板数大于等于times次的股票 done!=====')
def get_stock_basic(self): """ 获得上市状态的股票列表,只取6个字段 """ logger.info('=====获得上市状态的股票列表开始=====') logger.info('开始时间:%s' % datetime.now()) df = self.pro.stock_basic( list_status='L', fields='ts_code,symbol,name,area,industry,list_date') df.to_csv( const.ORIGIN_DATA_STOCK_BASIC, header=True, index=False, columns=[ 'ts_code', 'symbol', 'name', 'area', 'industry', 'list_date' ]) logger.info('文件:%s' % const.ORIGIN_DATA_STOCK_BASIC) logger.info('结束时间:%s' % datetime.now()) logger.info('=====获得上市状态的股票列表开始 done!=====')
def request_token(code): req_data = { 'grant_type': 'authorization_code', 'code': code, 'redirect_uri': REDIRECT_URI } resp = requests.post('https://www.reddit.com/api/v1/access_token', data=req_data, auth=(CLIENT_ID, CLIENT_SECRET), headers={'User-Agent': USER_AGENT}) resp = resp.json() token = resp.get('access_token') refresh = resp.get('refresh_token') if token is None or refresh is None: logger.warning(f'COULD NOT GET TOKEN FOR CODE {code}: {resp}') else: logger.info( f'requested token from server and got {token} (refresh token: {refresh})' ) return (token, refresh)
def ths_hsgt(self): headers = { 'Accept': 'text/html, */*; q=0.01', 'Accept-Language': 'zh-cn', 'Host': 'data.10jqka.com.cn', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15', 'Referer': 'http://data.10jqka.com.cn/hgt/sgtb/', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest', # 'hexin-v': 'ApHQejnwqQQd9MUQ6094oau_pp8q_gTkr3CplHMmj-51NbvJu04VQD_CuVcA' } r = requests.get( 'http://data.10jqka.com.cn/hgt/sgtb/', headers=headers, timeout=3) # logger.info(r.text) html = etree.HTML(r.text) trs = html.xpath('//*[@id="table1"]/table/tbody/tr') for tr in trs: logger.info(tr[0].text)
def only_once_stock_index_day(self, end_date=None): """ 获得指数日线行情 """ logger.info('=====获得指数日线行情=====') logger.info('开始时间:%s' % datetime.now()) if end_date is None: end_date = self.today_date start_date = self.get_cal_start_date() for index in const.CODE_INDEX_LIST: df = self.pro.index_daily( ts_code=index, start_date=start_date, end_date=end_date) # logger.info(df) filename = os.path.join(const.origin_data_index_day_path, index + '.csv') df.to_csv( filename, header=True, index=False, columns=const.COLUMNS) logger.info('文件:%s' % filename) logger.info('结束时间:%s' % datetime.now()) logger.info('=====获得指数日线行情 done!=====')
def drop_duplicates(self, path): """ 数据去重,依据文件中的 trade_date列 """ for root, dirs, files in os.walk(path): files.sort() count = 1 for file in files: if os.path.splitext(file)[1] == '.csv': filename = os.path.join(path, file) percent = round(1.00 * count / len(files) * 100, 2) logger.info( '进度 : %s [%d/%d],file:%s' % ((str(percent) + '%', count, len(files), file)), ) stock_data = pd.read_csv(filename) # 去重 stock_data.drop_duplicates('trade_date', inplace=True) stock_data.to_csv(filename, index=False, columns=const.COLUMNS) count = count + 1