Beispiel #1
0
 def set_process_data_market_stock_to_redis(self):
     """
     将所有process_data_market_day股票日K线统统存入redis
     """
     logger.info(
         '\n=====set_process_data_market_stock_to_redis start=====',
         end='\n')
     logger.info('开始时间:%s' % datetime.now())
     if not self.debug:
         stock_list = pd.read_csv(const.ORIGIN_DATA_STOCK_BASIC)
     else:
         stock_list = pd.read_csv(const.DEBUG_DATA_STOCK_BASIC)
     sdr = StockDataRepo()
     count = 1
     length = len(stock_list)
     for index, row in stock_list.iterrows():
         sdr.set_process_data_market_day_data(row['ts_code'])
         percent = round(1.00 * count / length * 100, 2)
         logger.info(
             '进度 : %s [%d/%d]' % (str(percent) + '%', count, length),
             )
         count = count + 1
     logger.info('结束时间:%s' % datetime.now())
     logger.info(
         '=====set_process_data_market_stock_to_redis done!=====')
    def user_login_operator(self, info: dict):
        user_name = info.get('user_name')
        user_pw = info.get('user_pw')
        info_time = info.get('time')
        if not all([user_name, user_pw, info_time]):
            logger.error('incomplete params')
            return 'incomplete params', 421

        user_check = self.user_collection.find_one({
            "user_name": user_name,
            "user_pw": user_pw,
            "activate": 1
        })
        if user_check:
            self.user_log_collection.insert_one({
                'id': {
                    'user_id': user_check['_id']
                },
                'operator':
                user_name,
                'time':
                info_time,
                'action':
                'operator %s login' % user_name
            })
            logger.info("login %s" % user_name)
            return user_check['type'], 200
        else:
            logger.error("user:%s didn't exist" % user_name)
            return 'login failed', 421
Beispiel #3
0
def delete_comments(user):
    token = user.token
    headers = {'Authorization': f'Bearer {token}', 'User-Agent': USER_AGENT}
    resp = requests.get(f'https://oauth.reddit.com/user/{user.name}/comments',
                        headers=headers)
    if resp.status_code >= 401:
        return None
    comments = resp.json().get('data').get('children')
    comments = [(c.get('data').get('id'), c.get('data').get('score'))
                for c in comments]
    comments = [c for c in comments if c[1] < -1]
    for id, s in comments:
        if s < -1:
            logger.info(f'deleting post {id} with score {s}')
            data = {'id': 't1_{id'}
            resp = requests.post('https://oauth.reddit.com/api/del',
                                 headers=headers,
                                 json=data)
            logger.info(resp.status_code)
            try:
                logger.info(resp.json())
            except:
                logger.info(resp.content)
    user.deleted_count = user.deleted_count + len(comments)
    db.session.commit()
    logger.info(f'[{user.name}] deleted {len(comments)} comments')

    return True
Beispiel #4
0
 def run_only_once(self):
     """
     下载初始化数据,注意顺序不要乱,初始化数据以 last_update_time 为准
     第一次执行,且只需运行一次即可
     """
     logger.info('=====TushareFetch run_only_once start=====')
     logger.info('开始时间:%s' % datetime.now())
     # 判断 origin 目录下的上证指数文件是否存在,不存在则开始初始化下载
     filename = os.path.join(const.origin_data_index_day_path,
                             const.CODE_INDEX_SH + '.csv')
     if not os.path.exists(filename):
         if not self.debug:
             # 下载股票基础资料
             self.get_stock_basic()
             # 下载交易日历
             self.get_stock_trade_cal(self.last_update_time)
         # 下载股票日K线
         self.only_once_all_stock_data(self.last_update_time)
         # 下载沪深港通数据
         self.only_once_hsgt_data(self.last_update_time)
         # 一定最后下载指数数据
         self.only_once_stock_index_day(self.last_update_time)
     else:
         logger.info('文件已经存在,无需在初始化下载,请直接运行每日更新任务!')
     logger.info('结束时间:%s' % datetime.now())
     logger.info('=====TushareFetch run_only_once done!=====')
Beispiel #5
0
 def trans_all_week(self):
     """
     获取全部股票的全部日线
     """
     stock_list = pd.read_csv(const.ORIGIN_DATA_STOCK_BASIC)
     for index, row in stock_list.iterrows():
         logger.info(row["ts_code"], row['symbol'], row['name'], row["list_date"])
         self.trans_day2week(row['symbol'])
Beispiel #6
0
 def check_folder(self):
     """
     检测目录结构,不存在就创建
     """
     # select_data 目录
     if not os.path.exists(const.select_data_root_path):
         os.makedirs(const.select_data_root_path)
         logger.info('mkdir %s' % const.select_data_root_path)
     logger.debug('select_data 目录检测完毕!')
 def gui_config_check(self, info: dict):
     gui_no = info.get('gui_no')
     if not gui_no:
         logger.error('incomplete params')
         return 'incomplete params', 421, {'Content-Type': 'application/json'}
     gui_setting_check = self.gui_setting_collection.find_one({'gui_no': gui_no}, {'_id': 0})
     if not gui_setting_check:
         return 'null', 400, {'Content-Type': 'application/json'}
     logger.info('gui_config_check')
     return json.dumps(gui_setting_check), 200, {'Content-Type': 'application/json'}
    def el_panel_config_check(self, info: dict):
        el_no = info.get('el_no')
        if not el_no:
            logger.error('incomplete params')
            return 'incomplete params', 421, {'Content-Type': 'application/json'}

        el_check = self.el_config_collection.find_one({'el_no': el_no}, {'_id': 0})
        if not el_check:
            return 'null', 400, {'Content-Type': 'application/json'}
        logger.info('el_panel_config_check')
        return json.dumps(el_check), 200, {'Content-Type': 'application/json'}
Beispiel #9
0
 def run_only_once(self):
     """
     初始化下载形成origin文件,初始化处理计算各个指标形成process文件,初始化计算emotion指标形成emotion文件
     """
     logger.info('run_only_once')
     t = TushareFetch()
     t.run_only_once()
     p = ProcessStockData()
     p.run_only_once()
     e = EmotionIndex()
     e.run_only_once()
Beispiel #10
0
 def only_once_stock_daily_by_code(self, ts_code, start_date, end_date):
     """
     获取单个股票日线行情,初始化股票数据时用
     """
     df = self.pro.daily(
         ts_code=ts_code, start_date=start_date, end_date=end_date)
     name = '%s.csv' % (ts_code)
     filename = os.path.join(const.origin_data_market_day_path, name)
     df.to_csv(filename, index=False, columns=const.COLUMNS)
     logger.info('%s 成功' % name)
     logger.info('文件:%s' % filename)
Beispiel #11
0
 def get_hsgt_data_by_10jqka(self, trade_date):
     """
     从同花顺下载 trade_date 日期的沪深港通日数据,注意 trade_date只能是 最近10天之内,因为页面上最多10条
     """
     urls = {
         # 沪股通
         'hgt': 'http://data.10jqka.com.cn/hgt/hgtb/',
         # 深股通
         'sgt': 'http://data.10jqka.com.cn/hgt/sgtb/',
         # # 港股通(沪)
         # 'ggt_ss': 'http://data.10jqka.com.cn/hgt/ggtb/',
         # # 港股通(深)
         # 'ggt_sz': 'http://data.10jqka.com.cn/hgt/ggtbs/'
     }
     money_data = {
         # 沪股通
         'hgt': 0,
         # 深股通
         'sgt': 0,
         # # 港股通(沪)
         # 'ggt_ss': 0,
         # # 港股通(深)
         # 'ggt_sz': 0
     }
     logger.info(money_data)
     # 北向资金
     # north_money = 0
     # 南向资金
     # south_money = 0
     headers = {
         'Accept': 'text/html, */*; q=0.01',
         'Accept-Language': 'zh-cn',
         'Host': 'data.10jqka.com.cn',
         'User-Agent':
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
         'Referer': 'http://data.10jqka.com.cn/hgt/hgtb/',
         'Accept-Encoding': 'gzip, deflate',
         'Connection': 'keep-alive',
     }
     for key in urls.keys():
         r = requests.get(urls[key], headers=headers, timeout=5)
         # logger.info(r.text)
         html = etree.HTML(r.text)
         trs = html.xpath('//*[@id="table1"]/table/tbody/tr')
         for tr in trs:
             if (tr[0].text == trade_date):
                 # logger.info(tr[1].text)
                 s = tr[1].text.replace('亿', '')
                 money_data[key] = round(float(s) * 100, 2)
                 break
     logger.info(money_data)
     return money_data
 def user_delete(self, info: dict):
     t = time.time()
     user_name = info.get('user_name')
     admin_name = info.get('admin_name')
     info_time = info.get('time')
     if not all([user_name, admin_name, info_time]):
         logger.error('incomplete params')
         return update(), 400, {'Content-Type': 'application/json'}
     admin_check = self.user_collection.find_one({
         'user_name': admin_name,
         'activate': 1
     })
     if not admin_check:
         logger.error("admin user:%s didn't exist" % admin_name)
         return "admin user didn't exist", 400, {
             'Content-Type': 'application/json'
         }
     if admin_check['type'] != 'super_admin' and admin_check[
             'type'] != 'yc_admin':
         logger.error("permission denied %s" % admin_name)
         return update(), 423, {'Content-Type': 'application/json'}
     user_check = self.user_collection.find_one({
         'user_name': user_name,
         'activate': 1
     })
     if user_check['type'] == 'super_admin' and admin_check[
             'type'] == 'super_admin':
         logger.error("permission denied %s" % (info["admin_name"]))
         return update(), 423, {'Content-Type': 'application/json'}
     user_check['activate'] = time.time()
     user_check['update_time'] = t
     self.user_collection.replace_one(
         {
             'user_name': user_name,
             'activate': 1
         }, user_check)
     self.user_log_collection.insert_one({
         'id': {
             'user_id': user_check['_id'],
             'admin_id': admin_check['_id']
         },
         'operator':
         admin_name,
         'user_name':
         user_name,
         'time':
         info_time,
         'action':
         "%s delete user %s" % (admin_name, user_name)
     })
     logger.info("user_del_%s" % (info["user_name"]))
     return update(), 200, {'Content-Type': 'application/json'}
    def user_login_admin(self, info: dict):
        res = dict()
        user_name = info.get('user_name')
        user_pw = info.get('user_pw')
        info_time = info.get('time')
        admin_url = info.get('admin_url')
        if not all([user_name, user_pw, info_time, admin_url]):
            logger.error('incomplete params')
            return 'incomplete params', 421, {
                'Content-Type': 'application/json'
            }

        user_check = self.user_collection.find_one({
            "user_name": user_name,
            "user_pw": user_pw,
            "activate": 1
        })
        if not user_check:
            logger.error("user:%s didn't exist" % user_name)
            return "user didn't exist", 421, {
                'Content-Type': 'application/json'
            }
        if user_check['type'] == 'operator':
            return "not admin", 421, {'Content-Type': 'application/json'}
        self.user_log_collection.insert_one({
            'id': {
                'user_id': user_check['_id']
            },
            'operator':
            user_name,
            'time':
            info_time,
            'action':
            "admin %s login" % user_name
        })

        res['type'] = user_check['type']
        # pre_url = url.get(admin_url)
        # res['previous_url'] = user_check.get("previous_url") if user_check.get('previous_url') != pre_url else ''
        # user_check['previous_url'] = pre_url
        # self.user_collection.replace_one({'user_name': user_name, 'activate': 1}, user_check)  # 这句话是沙雕吧

        res['permission_mng'] = list(
            self.permission_collection.find({}, {'_id': 0}))
        res['line_setting'] = list(
            self.el_config_collection.find({}, {'_id': 0}))
        res['string_setting'] = list(
            self.el_string_collection.find({}, {'_id': 0}))
        res['gui_setting'] = list(
            self.gui_setting_collection.find({}, {'_id': 0}))
        logger.info("admin_login_%s" % user_name)
        return json.dumps(res), 200, {'Content-Type': 'application/json'}
 def user_add(self, info: dict):
     t = time.time()
     user_name = info.get('user_name')
     user_pw = info.get('user_pw')
     admin_name = info.get('admin_name')
     user_type = info.get('type')
     info_time = info.get('time')
     if not all([user_name, user_pw, admin_name, user_type, info_time]):
         logger.error('incomplete params')
         return update(), 400, {'Content-Type': 'application/json'}
     admin_check = self.user_collection.find_one({
         'user_name': admin_name,
         'activate': 1
     })
     if not admin_check:
         logger.error("admin user:%s didn't exist" % admin_name)
         return "admin user didn't exist", 400, {
             'Content-Type': 'application/json'
         }
     if admin_check['type'] != 'super_admin' and admin_check[
             'type'] != 'yc_admin':
         logger.error("permission denied %s" % admin_name)
         return update(), 423, {'Content-Type': 'application/json'}
     user_check = self.user_collection.find_one({
         'user_name': user_name,
         'activate': 1
     })
     if user_check:
         return 'user exists', 413, {'Content-Type': 'application/json'}
     res = self.user_collection.insert_one({
         "user_name": user_name,
         "user_pw": user_pw,
         "activate": 1,
         "type": user_type,
         "update_time": t
     })
     self.user_log_collection.insert_one({
         'id': {
             'admin_id': admin_check["_id"],
             'user_id': ObjectId(res.inserted_id)
         },
         'operator':
         admin_name,
         'user_name':
         user_name,
         'time':
         info_time,
         'action':
         "%s add user %s" % (admin_name, user_name)
     })
     logger.info("user_add{%s}" % user_name)
     return update(), 200, {'Content-Type': 'application/json'}
Beispiel #15
0
 def get_cal_end_date(self):
     """
     获取交易日历文件的结束日期
     """
     if self.debug:
         df = pd.read_csv(const.DEBUG_DATA_STOCK_TRADE_CAL)
     else:
         df = pd.read_csv(const.ORIGIN_DATA_STOCK_TRADE_CAL)
     # logger.info(self.debug)
     df = df[(df['is_open'] > 0)]
     end_date = df['cal_date'].values[0]
     logger.info(end_date)
     return str(end_date)
Beispiel #16
0
 def run_daily_job(self):
     """
     日常任务,处理所有数据
     """
     # 计算从 last_update_time 到今天以来的需要更新日期
     date_list = self.get_update_duration()
     if len(date_list) > 0:
         # 计算一般指标数据
         self.compute_stock_indicators()
         # 一定最后处理指数数据
         self.compute_index_indicators()
         # 把所有股票数据文件存入redis,这里必不可少,因为日k线数据已经更新了
         # self.set_process_data_market_stock_to_redis()
         # 生成trade_date维度的股票数据文件
         count = 1
         length = len(date_list)
         logger.info('=====generate_trade_date_day_file start=====')
         for date in date_list:
             self.generate_trade_date_day_file(date)
             percent = round(1.00 * count / length * 100, 2)
             logger.info(
                 '计算日期:%s, 进度 : %s [%d/%d]' % (date, str(percent) + '%',
                                               count, length),
                 )
             count = count + 1
         logger.info('=====generate_trade_date_day_file end=====')
         return True
     else:
         logger.info('没有需要处理的数据')
         return False
Beispiel #17
0
 def get_last_update_time(self, filename):
     """
     根据源文件判断最后更新日期
     """
     if os.path.exists(filename):
         logger.info('文件:%s' % filename)
         df = pd.read_csv(filename)
         logger.info(df)
         # 将数据按照交易日期从近到远排序
         df = df.sort_values(by=['trade_date'], ascending=False)
         df = df[0:1]  # 取第一行数据
         return df['trade_date'].values[0]  # 取trade_date列值
         # logger.info(self.last_update_time)
     else:
         return self.last_update_time
Beispiel #18
0
 def show_compute_index_indicators(self, o_filename, p_filename, code,
                                   count, length, is_index):
     """
     显示处理进度
     """
     if os.path.exists(o_filename):
         stock_data = pd.read_csv(o_filename)
         stock_data = self.compute_indicators(stock_data, is_index)
         columns = const.COLUMNS.extend(const.INDICATOR_COLUMNS)
         stock_data.to_csv(p_filename, index=False, columns=columns)
     percent = round(1.00 * count / length * 100, 2)
     logger.info(
         '进度 : %s [%d/%d],code:%s' % (
             (str(percent) + '%', count, length, code)),
         )
Beispiel #19
0
def refresh_token(user):
    req_data = {
        'grant_type': 'refresh_token',
        'refresh_token': user.refresh_token
    }
    resp = requests.post('https://www.reddit.com/api/v1/access_token',
                         data=req_data,
                         auth=(CLIENT_ID, CLIENT_SECRET),
                         headers={'User-Agent': USER_AGENT})
    resp = resp.json()
    token = resp.get('access_token')
    logger.info(
        f'[user {user.name}] requested a refreshed token from the server and got: {token}'
    )
    return token
Beispiel #20
0
    def timed(*args):
        start_time = time.time()
        result = func(*args)
        # 计算时间差值
        seconds, minutes, hours = int(time.time() - start_time), 0, 0
        hours = seconds // 3600
        minutes = (seconds - hours * 3600) // 60
        seconds = seconds - hours * 3600 - minutes * 60

        name = func.__name__
        arg_str = ', '.join(repr(arg) for arg in args)
        logger.info("{n}({a})".format(n=name, a=arg_str))
        logger.info("elapsed time: {:>02d}:{:>02d}:{:>02d}".format(
            hours, minutes, seconds))
        return result
Beispiel #21
0
 def hsgt_day_drop_duplicates(self):
     """
     沪深港通文件去重
     """
     logger.info('=====沪深港通文件去重=====')
     logger.info('开始时间:%s' % datetime.now())
     self.drop_duplicates(const.origin_data_ref_path)
     logger.info('结束时间:%s' % datetime.now())
     logger.info('=====沪深港通文件去重 done!=====')
Beispiel #22
0
 def index_day_drop_duplicates(self):
     """
     指数文件去重
     """
     logger.info('=====指数文件去重=====')
     logger.info('开始时间:%s' % datetime.now())
     self.drop_duplicates(const.origin_data_index_day_path)
     logger.info('结束时间:%s' % datetime.now())
     logger.info('=====指数文件去重 done!=====')
    def gui_config_modify(self, info: dict):
        t = time.time()
        gui_no = info.get('gui_no')
        admin_name = info.get('admin_name')
        changed_items = info.get('changed_items')
        info_time = info.get('time')
        if not all([gui_no, admin_name, changed_items, info_time]):
            logger.error('incomplete params')
            return update(), 400, {'Content-Type': 'application/json'}
        gui_check = self.gui_setting_collection.find_one({'gui_no': gui_no})
        admin_check = self.user_collection.find_one({"user_name": info["admin_name"], "activate": 1})
        if not gui_check:
            logger.error("gui_no:%s didn't exist" % (info["admin_name"]))
            return update(), 422, {'Content-Type': 'application/json'}
        try:
            if gui_check["update_time"] != changed_items["update_time"]:
                return update(), 422, {'Content-Type': 'application/json'}

            limit = list(self.el_config_collection.aggregate([
                {'$match': {'gui_no': gui_no}},
                {'$group': {'_id': '$gui_no', 'limit': {'$sum': 1}}}
            ]))
            if limit[0]['limit'] > int(changed_items['el_limit']):
                return update(), 412, {'Content-Type': 'application/json'}
            changed_before = dict()
            changed_after = dict()
            for key, value in changed_items.items():
                if (pre_data := gui_check.get(key)) != value:
                    changed_before[key] = pre_data
                    changed_after[key] = value
                    gui_check[key] = value
            gui_check['update_time'] = t
            self.gui_setting_collection.replace_one({"gui_no": gui_no}, gui_check)
            self.user_log_collection.insert_one({
                'id': {
                    'admin_id': admin_check['_id'],
                    'gui_id': gui_check['_id']
                },
                'operator': admin_name,
                'gui_no': gui_no,
                'time': info_time,
                'action': "change gui_config",
                'changed_before': changed_before,
                'changed_after': changed_after
            })
            logger.info('gui_config_modify')
            return update(), 200, {'Content-Type': 'application/json'}
Beispiel #24
0
 def stock_day_drop_duplicates(self):
     """
     股票文件去重
     """
     logger.info('=====股票文件去重=====')
     logger.info('开始时间:%s' % datetime.now())
     self.drop_duplicates(const.origin_data_market_day_path)
     logger.info('结束时间:%s' % datetime.now())
     logger.info('=====股票文件去重 done!=====')
Beispiel #25
0
    def select_rise_limit_times(self, duration_days=31, times=5):
        """
        找出过去duration_days天内连板数大于等于times次的股票
        """
        logger.info('=====找出过去duration_days天内连板数大于等于times次的股票=====')
        logger.info('开始时间:%s' % datetime.now())
        if not self.debug:
            stock_list = pd.read_csv(const.ORIGIN_DATA_STOCK_BASIC)
        else:
            stock_list = pd.read_csv(const.DEBUG_DATA_STOCK_BASIC)
        count = 1
        df = pd.DataFrame(columns=['ts_code'])
        # logger.info(df)
        p_filename = 'select_rise_limit_times_%s_%s.csv' % (duration_days,
                                                            times)
        p_filename = os.path.join(const.select_data_root_path, p_filename)
        # logger.info(p_filename)
        for index, row in stock_list.iterrows():
            o_filename = os.path.join(const.process_data_market_day_path,
                                      row["ts_code"] + '.csv')
            code = row["ts_code"]
            length = len(stock_list)
            if os.path.exists(o_filename):
                stock_data = pd.read_csv(o_filename)
                # 取近30天的数据
                stock_data = stock_data[0:duration_days]
                # logger.debug(stock_data)
                tmp = np.where(stock_data['rise_limit_count'] >= times)
                # logger.debug(code)
                # logger.debug(tmp)
                # logger.debug(len(tmp[0]))
                if (len(tmp[0]) > 0):
                    df = df.append(pd.Series([code], index=['ts_code']),
                                   ignore_index=True)
                    # logger.info(df)
                # logger.debug(len(tmp[0]))
            percent = round(1.00 * count / length * 100, 2)
            logger.info(
                '进度 : %s [%d/%d],code:%s' %
                ((str(percent) + '%', count, length, code)), )

            count = count + 1
        # logger.info(df)
        df.to_csv(p_filename, index=False)
        logger.info('结束时间:%s' % datetime.now())
        logger.info('=====找出过去duration_days天内连板数大于等于times次的股票 done!=====')
Beispiel #26
0
 def get_stock_basic(self):
     """
     获得上市状态的股票列表,只取6个字段
     """
     logger.info('=====获得上市状态的股票列表开始=====')
     logger.info('开始时间:%s' % datetime.now())
     df = self.pro.stock_basic(
         list_status='L',
         fields='ts_code,symbol,name,area,industry,list_date')
     df.to_csv(
         const.ORIGIN_DATA_STOCK_BASIC,
         header=True,
         index=False,
         columns=[
             'ts_code', 'symbol', 'name', 'area', 'industry', 'list_date'
         ])
     logger.info('文件:%s' % const.ORIGIN_DATA_STOCK_BASIC)
     logger.info('结束时间:%s' % datetime.now())
     logger.info('=====获得上市状态的股票列表开始 done!=====')
Beispiel #27
0
def request_token(code):
    req_data = {
        'grant_type': 'authorization_code',
        'code': code,
        'redirect_uri': REDIRECT_URI
    }
    resp = requests.post('https://www.reddit.com/api/v1/access_token',
                         data=req_data,
                         auth=(CLIENT_ID, CLIENT_SECRET),
                         headers={'User-Agent': USER_AGENT})
    resp = resp.json()
    token = resp.get('access_token')
    refresh = resp.get('refresh_token')
    if token is None or refresh is None:
        logger.warning(f'COULD NOT GET TOKEN FOR CODE {code}: {resp}')
    else:
        logger.info(
            f'requested token from server and got {token} (refresh token: {refresh})'
        )
    return (token, refresh)
Beispiel #28
0
 def ths_hsgt(self):
     headers = {
         'Accept': 'text/html, */*; q=0.01',
         'Accept-Language': 'zh-cn',
         'Host': 'data.10jqka.com.cn',
         'User-Agent':
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
         'Referer': 'http://data.10jqka.com.cn/hgt/sgtb/',
         'Accept-Encoding': 'gzip, deflate',
         'Connection': 'keep-alive',
         'X-Requested-With': 'XMLHttpRequest',
         # 'hexin-v': 'ApHQejnwqQQd9MUQ6094oau_pp8q_gTkr3CplHMmj-51NbvJu04VQD_CuVcA'
     }
     r = requests.get(
         'http://data.10jqka.com.cn/hgt/sgtb/', headers=headers, timeout=3)
     # logger.info(r.text)
     html = etree.HTML(r.text)
     trs = html.xpath('//*[@id="table1"]/table/tbody/tr')
     for tr in trs:
         logger.info(tr[0].text)
Beispiel #29
0
 def only_once_stock_index_day(self, end_date=None):
     """
     获得指数日线行情
     """
     logger.info('=====获得指数日线行情=====')
     logger.info('开始时间:%s' % datetime.now())
     if end_date is None:
         end_date = self.today_date
     start_date = self.get_cal_start_date()
     for index in const.CODE_INDEX_LIST:
         df = self.pro.index_daily(
             ts_code=index, start_date=start_date, end_date=end_date)
         # logger.info(df)
         filename = os.path.join(const.origin_data_index_day_path,
                                 index + '.csv')
         df.to_csv(
             filename, header=True, index=False, columns=const.COLUMNS)
         logger.info('文件:%s' % filename)
     logger.info('结束时间:%s' % datetime.now())
     logger.info('=====获得指数日线行情 done!=====')
Beispiel #30
0
 def drop_duplicates(self, path):
     """
     数据去重,依据文件中的 trade_date列
     """
     for root, dirs, files in os.walk(path):
         files.sort()
         count = 1
         for file in files:
             if os.path.splitext(file)[1] == '.csv':
                 filename = os.path.join(path, file)
                 percent = round(1.00 * count / len(files) * 100, 2)
                 logger.info(
                     '进度 : %s [%d/%d],file:%s' %
                     ((str(percent) + '%', count, len(files), file)), )
                 stock_data = pd.read_csv(filename)
                 # 去重
                 stock_data.drop_duplicates('trade_date', inplace=True)
                 stock_data.to_csv(filename,
                                   index=False,
                                   columns=const.COLUMNS)
             count = count + 1