Ejemplo n.º 1
0
def test_get_china_stock_list():
    print(settings.FOOLTRADER_STORE_PATH)
    df = technical.get_security_list('stock', exchanges=['sh', 'sz'])
    assert '000001' in df.index
    assert '金融行业' == df.loc['000001', 'sinaIndustry']

    df = technical.get_security_list('stock', exchanges=['sh'])
    assert '600000' in df.index
    assert '金融行业' == df.loc['600000', 'sinaIndustry']

    df = technical.get_security_list('stock',
                                     exchanges=['sh', 'sz'],
                                     start_code='000338',
                                     end_code='600388')
    assert '000338' in df.index
    assert '600388' in df.index
    assert '600389' not in df.index

    df = technical.get_security_list('stock',
                                     exchanges=['sh', 'sz'],
                                     codes=['300027', '000002'])
    assert len(df.index) == 2

    df = technical.get_security_list('stock',
                                     exchanges=['sh', 'sz'],
                                     mode='es')
    assert type(df.loc['600004', 'sinaArea']) == list
    assert '广州' in (df.loc['600004', 'sinaArea'])
    assert '广东' in (df.loc['600004', 'sinaArea'])
Ejemplo n.º 2
0
def init_env():
    if not os.path.exists(FOOLTRADER_STORE_PATH):
        print("{} is a wrong path")
        print("please set env FOOLTRADER_STORE_PATH to working path or set it in settings.py")
    else:
        # 初始化股票文件夹
        for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
            mkdir_for_stock(item)

        # 初始化指数文件夹
        for _, item in get_security_list(security_type='index', exchanges=['sh', 'sz', 'nasdaq']).iterrows():
            kdata_dir = get_kdata_dir(item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)
        # 初始化期货文件夹
        for exchange in ['shfe', 'dce', 'zce']:
            exchange_cache_dir = get_exchange_cache_dir(security_type='future', exchange=exchange)
            if not os.path.exists(exchange_cache_dir):
                os.makedirs(exchange_cache_dir)

            exchange_cache_dir = get_exchange_cache_dir(security_type='future', exchange='shfe',
                                                        the_year=datetime.datetime.today().year,
                                                        data_type="day_kdata")
            if not os.path.exists(exchange_cache_dir):
                os.makedirs(exchange_cache_dir)

            exchange_dir = get_exchange_dir(security_type='future', exchange=exchange)
            if not os.path.exists(exchange_dir):
                os.makedirs(exchange_dir)
Ejemplo n.º 3
0
def crawl_index_quote():
    for _, security_item in get_security_list(security_type='index').iterrows():
        # 抓取日K线
        logger.info("{} get index kdata start".format(security_item['code']))

        start_date, _ = get_latest_download_trading_date(security_item, source='163')
        end_date = pd.Timestamp.today()
        if start_date > end_date:
            logger.info("{} kdata is ok".format(security_item['code']))
        else:
            process_crawl(StockKdata163Spider, {"security_item": security_item,
                                                "start_date": start_date,
                                                "end_date": end_date})

        logger.info("{} get index kdata from 163 end".format(security_item['code']))

        # 获取市场概况数据[上海,深圳,中小板,创业板]
        if security_item['id'] in ['index_sh_000001', 'index_sz_399106', 'index_sz_399005', 'index_sz_399006']:
            # if security_item['id'] in ['index_sz_399106', 'index_sz_399005', 'index_sz_399006']:
            df = get_kdata(security_item=security_item)
            df = df[df['turnoverRate'].isna() | df['tCap'].isna() | df['mCap'].isna() | df[
                'pe'].isna()]
            if not df.empty:
                dates = df.index.strftime('%Y-%m-%d').tolist()
                # if security_item['id'] == 'index_sz_399106':
                # dates = [the_date for the_date in dates if
                #          pd.Timestamp(the_date).date().year >= 2018]
                if dates:
                    process_crawl(StockSummarySpider, {"security_item": security_item,
                                                       "the_dates": dates})
Ejemplo n.º 4
0
def legacy_kdata_to_csv():
    for index, security_item in get_security_list().iterrows():
        for fuquan in (True, False):
            dir = get_kdata_dir_old(security_item, fuquan)
            if os.path.exists(dir):
                files = [os.path.join(dir, f) for f in os.listdir(dir) if
                         ('all' not in f and 'json' in f and os.path.isfile(os.path.join(dir, f)))]

                for f in files:
                    tmp = os.path.basename(f).split('_')
                    if fuquan:
                        csv_path = get_kdata_path(security_item, tmp[0], tmp[1], 'hfq')
                        if not os.path.exists(csv_path):
                            df = pd.read_json(f, dtype={'code': str})
                            logger.info("{} to {}".format(f, csv_path))

                            df = df.loc[:,
                                 ['timestamp', 'code', 'low', 'open', 'close', 'high', 'volume', 'turnover',
                                  'securityId',
                                  'fuquan']]
                            df.columns = KDATA_COLUMN_SINA_FQ

                            df.to_csv(csv_path, index=False)
                    else:
                        csv_path = get_kdata_path(security_item, tmp[0], tmp[1], 'bfq')
                        if not os.path.exists(csv_path):
                            df = pd.read_json(f, dtype={'code': str})
                            logger.info("{} to {}".format(f, csv_path))

                            df = df.loc[:, KDATA_COLUMN_SINA]

                            df.to_csv(csv_path, index=False)
Ejemplo n.º 5
0
def kdata_to_es(security_type='stock',
                start_code=None,
                end_code=None,
                force=False):
    codes = None
    if security_type == 'stock':
        doc_type = StockKData
    elif security_type == 'index':
        doc_type = IndexKData
    elif security_type == 'cryptocurrency':
        doc_type = CryptoCurrencyKData
        codes = CRYPTOCURRENCY_CODE

    for _, security_item in get_security_list(security_type=security_type,
                                              start_code=start_code,
                                              end_code=end_code,
                                              codes=codes).iterrows():
        index_name = get_es_kdata_index(security_item['type'],
                                        security_item['exchange'])

        df = get_kdata(security_item, generate_id=True)

        df_to_es(df,
                 doc_type=doc_type,
                 index_name=index_name,
                 security_item=security_item,
                 force=force)
Ejemplo n.º 6
0
def kdata_to_es(security_type='stock',
                start_code=None,
                end_code=None,
                force=False):
    if security_type == 'stock':
        doc_type = StockKData
    elif security_type == 'index':
        doc_type = IndexKData
    elif security_type == 'cryptocurrency':
        doc_type = CryptoCurrencyKData

    for _, security_item in get_security_list(security_type=security_type,
                                              start_code=start_code,
                                              end_code=end_code).iterrows():
        index_name = get_es_kdata_index(security_item['type'],
                                        security_item['exchange'])

        query = None
        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]

        df = get_kdata(security_item, generate_id=True)

        df_to_es(df,
                 doc_type=doc_type,
                 index_name=index_name,
                 query=query,
                 force=force)
    def start_requests(self):
        security_item = self.settings.get("security_item")

        if security_item is not None:
            item = security_item
            data_url = self.get_finance_url(item['code'])
            data_path = get_finance_path(item)

            yield Request(url=data_url,
                          meta={
                              'path': data_path,
                              'item': item
                          },
                          callback=self.download_finance_csv)
        else:
            for _, item in get_security_list(exchanges=['nasdaq']).iterrows():
                data_url = self.get_finance_url(item['code'])
                data_path = get_finance_path(item)

                yield Request(url=data_url,
                              meta={
                                  'path': data_path,
                                  'item': item
                              },
                              callback=self.download_finance_csv)
Ejemplo n.º 8
0
def remove_old_kdata():
    for index, security_item in get_security_list().iterrows():
        for fuquan in (True, False):
            dir = get_kdata_dir_old(security_item, fuquan)
            if os.path.exists(dir):
                if fuquan:
                    logger.info("remove {}".format(dir))
                    shutil.rmtree(dir)
Ejemplo n.º 9
0
def finance_report_event_to_csv():
    for index, security_item in get_security_list().iterrows():
        the_path = get_finance_report_event_path(security_item)
        if os.path.exists(the_path):
            df = pd.read_csv(the_path)
            df = df.rename(columns={'reportEventDate': 'timestamp', 'reportDate': 'reportPeriod'})
            df = df.loc[:, EVENT_STOCK_FINANCE_REPORT_COL]
            df.to_csv(get_finance_report_event_path(security_item), index=False)
            logger.info("transform {} report event".format(security_item['code']))
Ejemplo n.º 10
0
 def start_requests(self):
     for _, item in get_security_list().iterrows():
         url = self.get_forecast_url(item['code'])
         yield Request(url=url,
                       headers=DEFAULT_KDATA_HEADER,
                       meta={
                           'item': item,
                       },
                       callback=self.download_forecast_data)
Ejemplo n.º 11
0
def remove_old_tick():
    for index, security_item in get_security_list().iterrows():
        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [os.path.join(dir, f) for f in os.listdir(dir) if
                     ('xls' in f and 'lock' not in f and 'error' not in f and os.path.isfile(os.path.join(dir, f)))]
            for f in files:
                logger.info("remove {}".format(f))
                os.remove(f)
 def start_requests(self):
     security_item = self.settings.get("security_item")
     if security_item is not None:
         for request in self.yield_request(security_item):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
Ejemplo n.º 13
0
def forecast_event_to_csv():
    for index, security_item in get_security_list().iterrows():
        the_path = get_forecast_event_path(security_item)
        if os.path.exists(the_path):
            df = pd.read_json(get_forecast_event_path(security_item))
            df = df.rename(columns={'reportDate': 'timestamp'})
            df = df.loc[:, EVENT_STOCK_FINANCE_FORECAST_COL]
            df.to_csv(get_finance_forecast_event_path(security_item), index=False)
            logger.info("transform {} forecast event".format(security_item['code']))
            os.remove(the_path)
Ejemplo n.º 14
0
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE):
    for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows():
        try:
            # 先抓事件,有些后续抓取依赖事件
            process_crawl(StockFinanceReportEventSpider, {"security_item": security_item})

            current_report_period = get_report_period()

            # 资产负债表
            path = get_balance_sheet_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "balance_sheet"})
            else:
                current_items = get_balance_sheet_items(security_item)
                # 当前报告期还没抓取

                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "balance_sheet"})

            # 利润表
            path = get_income_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "income_statement"})
            else:
                current_items = get_income_statement_items(security_item)
                # 当前报告期还没抓取
                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "income_statement"})

            # 现金流量表
            path = get_cash_flow_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "cash_flow"})
            else:
                current_items = get_cash_flow_statement_items(security_item)
                # 当前报告期还没抓取
                if current_report_period != current_items[-1]['reportPeriod']:
                    # 报告出来了
                    # df = event.get_finance_report_event(security_item, index='reportPeriod')
                    # if current_report_period in df.index:
                    process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                       "report_type": "cash_flow"})
        except Exception as e:
            logger.exception(e)
Ejemplo n.º 15
0
def usa_stock_finance_to_es(force=False):
    for _, security_item in get_security_list(security_type='stock',
                                              exchanges=['nasdaq'],
                                              codes=US_STOCK_CODES).iterrows():
        df = get_finance_summary_items(security_item)

        df_to_es(df,
                 doc_type=FinanceSummary,
                 timestamp_filed='reportPeriod',
                 security_item=security_item,
                 force=force)
Ejemplo n.º 16
0
def security_meta_to_es(security_type='stock'):
    if security_type == 'stock':
        doc_type = StockMeta
    elif security_type == 'cryptocurrency':
        doc_type = CryptocurrencyMeta
    elif security_type == 'index':
        doc_type = IndexMeta

    df = get_security_list(security_type=security_type)

    df_to_es(df, doc_type, force=True)
Ejemplo n.º 17
0
 def start_requests(self):
     item = self.settings.get("security_item")
     if item is not None:
         for request in self.yield_request(item):
             yield request
     else:
         for _, item in get_security_list(
                 start_code=STOCK_START_CODE,
                 end_code=STOCK_END_CODE).iterrows():
             for request in self.yield_request(item):
                 yield request
Ejemplo n.º 18
0
    def start_requests(self):
        self.category_type = self.settings.get("category_type")

        self.sh_df = get_security_list(exchanges=['sh'])
        self.sz_df = get_security_list(exchanges=['sz'])
        self.file_lock = threading.RLock()

        # 清除老数据
        self.sh_df[self.category_type] = None
        self.sz_df[self.category_type] = None

        if self.category_type == 'sinaIndustry':
            url = 'http://vip.stock.finance.sina.com.cn/q/view/newSinaHy.php'
        elif self.category_type == 'sinaConcept':
            url = 'http://money.finance.sina.com.cn/q/view/newFLJK.php?param=class'
        elif self.category_type == 'sinaArea':
            url = 'http://money.finance.sina.com.cn/q/view/newFLJK.php?param=area'
        else:
            return

        yield Request(url=url, callback=self.download_sina_category)
Ejemplo n.º 19
0
def check_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if not os.path.exists(dayk_path):
                logger.warn(get_security_dir(security_item))

        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [os.path.join(dir, f) for f in os.listdir(dir) if
                     ('csv' in f and os.path.isfile(os.path.join(dir, f)))]
            if not files:
                logger.warn(get_security_dir(security_item))
    def merge_kdata_to_one(security_item=None, replace=False, fuquan='bfq'):
        if type(security_item) != 'NoneType':
            items = pd.DataFrame().append(security_item).iterrows()
        else:
            items = get_security_list().iterrows()

        if fuquan:
            fuquans = [fuquan]
        else:
            fuquans = ['bfq', 'hfq']

        for index, security_item in items:
            for fuquan in fuquans:
                dayk_path = get_kdata_path(security_item,
                                           source='sina',
                                           fuquan=fuquan)
                if fuquan == 'hfq':
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN_SINA_FQ)
                else:
                    df = pd.DataFrame(columns=data_contract.KDATA_COLUMN_SINA)

                the_dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(the_dir):
                    files = [
                        os.path.join(the_dir, f) for f in os.listdir(the_dir)
                        if ('dayk.csv' not in f
                            and os.path.isfile(os.path.join(the_dir, f)))
                    ]
                    for f in files:
                        df = df.append(pd.read_csv(f, dtype=str),
                                       ignore_index=True)
                if df.size > 0:
                    df = df.set_index(df['timestamp'])
                    df.index = pd.to_datetime(df.index)
                    df = df.sort_index()
                    logger.info("{} to {}".format(security_item['code'],
                                                  dayk_path))
                    if replace:
                        df.to_csv(dayk_path, index=False)
                    else:
                        StockKDataSinaSpider.merge_to_current_kdata(
                            security_item, df, fuquan=fuquan)

                for f in files:
                    logger.info("remove {}".format(f))
                    os.remove(f)

                if fuquan == 'hfq':
                    StockKDataSinaSpider.add_factor_to_163(security_item)
Ejemplo n.º 21
0
 def start_requests(self):
     # 两种模式:
     # 1)item,trading_dates不指定,用于全量下载数据
     # 2)指定,用于修复
     item = self.settings.get("security_item")
     trading_dates = self.settings.get("trading_dates")
     fuquan = self.settings.get("fuquan")
     if item is not None:
         for request in self.yield_request(item, trading_dates, fuquan):
             yield request
     else:
         for _, item in get_security_list(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE).iterrows():
             for request in self.yield_request(item):
                 yield request
Ejemplo n.º 22
0
def handle_error_tick():
    for index, security_item in get_security_list().iterrows():
        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [os.path.join(dir, f) for f in os.listdir(dir) if
                     (('fatal' in f or 'error' in f) and os.path.isfile(os.path.join(dir, f)))]
            for f in files:
                try:
                    the_date = get_file_name(f)
                    csv_path = get_tick_path(security_item, the_date)
                    if not os.path.exists(csv_path):
                        logger.info("{} to {}".format(f, csv_path))
                        sina_tick_to_csv(security_item, f, the_date)
                except Exception as e:
                    logger.warn(e)
                    os.rename(f, f + ".fatal")
Ejemplo n.º 23
0
def usa_stock_finance_to_es(force=False):
    for _, security_item in get_security_list(security_type='stock',
                                              exchanges=['nasdaq'],
                                              codes=US_STOCK_CODES).iterrows():
        query = None
        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]

        df = get_finance_summary_items(security_item)

        df_to_es(df,
                 doc_type=FinanceSummary,
                 timestamp_filed='reportPeriod',
                 query=query,
                 force=force)
Ejemplo n.º 24
0
def finance_sheet_to_es(sheet_type=None,
                        start_code=None,
                        end_code=None,
                        force=False):
    if sheet_type is None:
        sheet_types = [
            'balance_sheet', 'income_statement', 'cash_flow_statement'
        ]
    else:
        sheet_types = [sheet_type]

    for sheet_type in sheet_types:
        if sheet_type == 'balance_sheet':
            doc_type = BalanceSheet
        elif sheet_type == 'income_statement':
            doc_type = IncomeStatement
        elif sheet_type == 'cash_flow_statement':
            doc_type = CashFlowStatement

        es_index_mapping(sheet_type, doc_type)

        for _, security_item in get_security_list(
                start_code=start_code, end_code=end_code).iterrows():
            try:
                if sheet_type == 'balance_sheet':
                    items = get_balance_sheet_items(security_item)
                elif sheet_type == 'income_statement':
                    items = get_income_statement_items(security_item)
                elif sheet_type == 'cash_flow_statement':
                    items = get_cash_flow_statement_items(security_item)

                df = pd.DataFrame(items)

                df = index_df_with_time(df, index='reportPeriod')

                df_to_es(df,
                         doc_type=doc_type,
                         timestamp_filed='reportPeriod',
                         security_item=security_item,
                         force=force)
            except Exception as e:
                logger.exception(
                    "index {} {} failed".format(security_item['code'],
                                                sheet_type), e)
Ejemplo n.º 25
0
def crawl_stock_quote(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE, crawl_tick=True):
    # 抓取股票k线
    for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows():
        # 抓取日K线
        logger.info("{} get stock kdata start".format(security_item['code']))

        start_date, _ = get_latest_download_trading_date(security_item, source='163')
        end_date = pd.Timestamp.today()
        if start_date > end_date:
            logger.info("{} stock kdata is ok".format(security_item['code']))
        else:
            process_crawl(StockKdata163Spider, {"security_item": security_item,
                                                "start_date": start_date,
                                                "end_date": end_date})

        logger.info("{} get stock kdata from 163 end".format(security_item['code']))

        base_dates = set(get_trading_dates(security_item, source='163'))
        for fuquan in ('bfq', 'hfq'):
            sina_dates = set(get_trading_dates(security_item, source='sina', fuquan=fuquan))
            diff_dates = base_dates - sina_dates
            if diff_dates:
                logger.info("{} get {} kdata from sina start".format(security_item['code'], fuquan))
                process_crawl(StockKDataSinaSpider, {"security_item": security_item,
                                                     "trading_dates": diff_dates,
                                                     "fuquan": fuquan})
                logger.info("{} get {} kdata from sina end".format(security_item['code'], fuquan))
            else:
                logger.info("{} {} kdata from sina is ok".format(security_item['code'], fuquan))

        # 抓取tick
        # FIXME:新浪该服务已不可用
        if crawl_tick and False:
            tick_dates = {x for x in base_dates if x >= settings.START_TICK_DATE}
            diff_dates = tick_dates - set(get_available_tick_dates(security_item))

            if diff_dates:
                logger.info("{} get tick start".format(security_item['code']))
                process_crawl(StockTickSpider, {"security_item": security_item,
                                                "trading_dates": diff_dates})
                logger.info("{} get tick end".format(security_item['code']))
            else:
                logger.info("{} tick is ok".format(security_item['code']))
Ejemplo n.º 26
0
def finance_event_to_es(event_type='finance_forecast',
                        start_code=None,
                        end_code=None,
                        force=False):
    if event_type == 'finance_forecast':
        doc_type = FinanceForecastEvent
    elif event_type == 'finance_report':
        doc_type = FinanceReportEvent

    for _, security_item in get_security_list(start_code=start_code,
                                              end_code=end_code).iterrows():
        if event_type == 'finance_forecast':
            df = get_finance_forecast_event(security_item)
        elif event_type == 'finance_report':
            df = get_finance_report_event(security_item)

        df_to_es(df,
                 doc_type=doc_type,
                 security_item=security_item,
                 force=force)
Ejemplo n.º 27
0
def restore_kdata():
    for index, security_item in get_security_list(start_code='600000', end_code='600017').iterrows():
        path_163 = get_kdata_path(security_item, source='163', fuquan='bfq')
        df = pd.read_csv(path_163, dtype=str)
        df = time_index_df(df)

        if 'id' in df.columns:
            df = df.drop(['id'], axis=1)
        df = df[~df.index.duplicated(keep='first')]
        df.timestamp.apply(lambda x: to_time_str(x))
        df.to_csv(path_163, index=False)

        for fuquan in ('hfq', 'bfq'):
            path_sina = get_kdata_path(security_item, source='sina', fuquan=fuquan)
            df = pd.read_csv(path_sina, dtype=str)
            df = time_index_df(df)
            if 'id' in df.columns:
                df = df.drop(['id'], axis=1)
            df = df[~df.index.duplicated(keep='first')]
            df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))
            df.to_csv(path_sina, index=False)
Ejemplo n.º 28
0
def finance_event_to_es(event_type='finance_forecast',
                        start_code=None,
                        end_code=None,
                        force=False):
    if event_type == 'finance_forecast':
        doc_type = FinanceForecastEvent
    elif event_type == 'finance_report':
        doc_type = FinanceReportEvent

    for _, security_item in get_security_list(start_code=start_code,
                                              end_code=end_code).iterrows():
        query = None
        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]

        if event_type == 'finance_forecast':
            df = get_finance_forecast_event(security_item)
        elif event_type == 'finance_report':
            df = get_finance_report_event(security_item)

        df_to_es(df, doc_type=doc_type, query=query, force=force)
Ejemplo n.º 29
0
def check_convert_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if os.path.exists(dayk_path):
                df_result = pd.read_csv(dayk_path)

                if fuquan == 'hfq':
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN_SINA_FQ)
                else:
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN_SINA)

                dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(dir):
                    files = [os.path.join(dir, f) for f in os.listdir(dir) if
                             ('day' not in f and 'csv' in f and os.path.isfile(os.path.join(dir, f)))]
                    for f in files:
                        df = df.append(pd.read_csv(f), ignore_index=True)
                    assert_df(df, df_result)
                    logger.info("{} merge as one ok".format(security_item['code']))
Ejemplo n.º 30
0
def finance_sheet_to_es(sheet_type='balance_sheet',
                        start_code=None,
                        end_code=None,
                        force=False):
    if sheet_type == 'balance_sheet':
        doc_type = BalanceSheet
    elif sheet_type == 'income_statement':
        doc_type = IncomeStatement
    elif sheet_type == 'cash_flow_statement':
        doc_type = CashFlowStatement

    es_index_mapping(sheet_type, doc_type)

    for _, security_item in get_security_list(start_code=start_code,
                                              end_code=end_code).iterrows():
        query = None
        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]

        if sheet_type == 'balance_sheet':
            items = get_balance_sheet_items(security_item)
        elif sheet_type == 'income_statement':
            items = get_income_statement_items(security_item)
        elif sheet_type == 'cash_flow_statement':
            items = get_cash_flow_statement_items(security_item)

        df = pd.DataFrame(items)

        df = index_df_with_time(df, index='reportPeriod')

        df_to_es(df,
                 doc_type=doc_type,
                 timestamp_filed='reportPeriod',
                 query=query,
                 force=force)