Example #1
0
def crawl_index_quote():
    for _, security_item in get_security_list(security_type='index').iterrows():
        # 抓取日K线
        logger.info("{} get index kdata start".format(security_item['code']))

        start_date = get_latest_download_trading_date(security_item, source='163')
        end_date = pd.Timestamp.today()
        if start_date > end_date:
            logger.info("{} kdata is ok".format(security_item['code']))
        else:
            process_crawl(StockKdataSpider163, {"security_item": security_item,
                                                "start_date": start_date,
                                                "end_date": end_date})

        logger.info("{} get index kdata from 163 end".format(security_item['code']))

        # 获取市场概况数据[上海,深圳,中小板,创业板]
        if security_item['id'] in ['index_sh_000001', 'index_sz_399106', 'index_sz_399005', 'index_sz_399006']:
            # if security_item['id'] in ['index_sz_399106', 'index_sz_399005', 'index_sz_399006']:
            df = get_kdata(security_item=security_item)
            df = df[df['turnoverRate'].isna() | df['tCap'].isna() | df['mCap'].isna() | df[
                'pe'].isna()]
            if not df.empty:
                dates = df.index.strftime('%Y-%m-%d').tolist()
                # if security_item['id'] == 'index_sz_399106':
                # dates = [the_date for the_date in dates if
                #          pd.Timestamp(the_date).date().year >= 2018]
                if dates:
                    process_crawl(StockSummarySpider, {"security_item": security_item,
                                                       "the_dates": dates})
Example #2
0
def fetch_kdata(exchange_str='bitstamp'):
    ccxt_exchange = eval("ccxt.{}()".format(exchange_str))
    if ccxt_exchange.has['fetchOHLCV']:
        for _, security_item in get_security_list(security_type='cryptocurrency', exchanges=[exchange_str]).iterrows():
            try:
                if security_item['name'] not in CRYPTOCURRENCY_PAIR:
                    continue

                start_date, df = get_latest_download_trading_date(security_item)
                # 日K线只抓到昨天
                end_date = pd.Timestamp.today() - pd.DateOffset(1)

                if start_date and (start_date > end_date):
                    logger.info("{} kdata is ok".format(security_item['code']))
                    continue

                try:
                    kdatas = ccxt_exchange.fetch_ohlcv(security_item['name'], timeframe='1d')
                    # for rateLimit
                    time.sleep(5)
                except Exception as e:
                    logger.exception("fetch_kdata for {} {} failed".format(exchange_str, security_item['name']), e)
                    continue

                for kdata in kdatas:
                    timestamp = pd.Timestamp.fromtimestamp(int(kdata[0] / 1000))
                    if is_same_date(timestamp, pd.Timestamp.today()):
                        continue
                    kdata_json = {
                        'timestamp': to_time_str(timestamp),
                        'code': security_item['code'],
                        'name': security_item['name'],
                        'open': kdata[1],
                        'high': kdata[2],
                        'low': kdata[3],
                        'close': kdata[4],
                        'volume': kdata[5],
                        'securityId': security_item['id'],
                        'preClose': None,
                        'change': None,
                        'changePct': None
                    }
                    df = df.append(kdata_json, ignore_index=True)
                if not df.empty:
                    df = df.loc[:, KDATA_COMMON_COL]
                    kdata_df_save(df, get_kdata_path(security_item), calculate_change=True)
                    logger.info(
                        "fetch_kdata for exchange:{} security:{} success".format(exchange_str, security_item['name']))
            except Exception as e:
                logger.info(
                    "fetch_kdata for exchange:{} security:{} failed".format(exchange_str, security_item['name'], e))
    else:
        logger.warning("exchange:{} not support fetchOHLCV".format(exchange_str))
Example #3
0
def crawl_stock_quote(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE, crawl_tick=False):
    # 抓取股票k线
    for _, security_item in get_security_list(start=start_code, end=end_code).iterrows():
        # 抓取日K线
        logger.info("{} get stock kdata start".format(security_item['code']))
        print("{} get stock kdata start".format(security_item['code']))

        start_date = get_latest_download_trading_date(security_item, source='163')
        end_date = pd.Timestamp.today()
        if start_date > end_date:
            logger.info("{} stock kdata is ok".format(security_item['code']))
            print("{} stock kdata is ok".format(security_item['code']))
        else:
            process_crawl(StockKdataSpider163, {"security_item": security_item,
                                                "start_date": start_date,
                                                "end_date": end_date})

        logger.info("{} get stock kdata from 163 end".format(security_item['code']))
        print("{} get stock kdata from 163 end".format(security_item['code']))

        base_dates = set(get_trading_dates(security_item, source='163'))
        for fuquan in ('bfq', 'hfq'):
            sina_dates = set(get_trading_dates(security_item, source='sina', fuquan=fuquan))
            diff_dates = base_dates - sina_dates
            if diff_dates:
                logger.info("{} get {} kdata from sina start".format(security_item['code'], fuquan))
                print("{} get {} kdata from sina start".format(security_item['code'], fuquan))
                process_crawl(StockKDataSpider, {"security_item": security_item,
                                                 "trading_dates": diff_dates,
                                                 "fuquan": fuquan})
                logger.info("{} get {} kdata from sina end".format(security_item['code'], fuquan))
                print("{} get {} kdata from sina end".format(security_item['code'], fuquan))
            else:
                logger.info("{} {} kdata from sina is ok".format(security_item['code'], fuquan))
                print("{} {} kdata from sina is ok".format(security_item['code'], fuquan))


        # 抓取tick
        if crawl_tick:
            tick_dates = {x for x in base_dates if x >= settings.START_TICK_DATE}
            diff_dates = tick_dates - set(get_available_tick_dates(security_item))

            if diff_dates:
                logger.info("{} get tick start".format(security_item['code']))
                process_crawl(StockTickSpider, {"security_item": security_item,
                                                "trading_dates": diff_dates})
                logger.info("{} get tick end".format(security_item['code']))
            else:
                logger.info("{} tick is ok".format(security_item['code']))