Ejemplo n.º 1
0
 def spider_closed(self, spider, reason):
     if self.sh_df[self.category_type].any():
         self.sh_df.to_csv(get_security_list_path('stock', 'sh'),
                           index=False)
     if self.sz_df[self.category_type].any():
         self.sz_df.to_csv(get_security_list_path('stock', 'sz'),
                           index=False)
     spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Ejemplo n.º 2
0
    def download_stock_list(self, response):
        exchange = response.meta['exchange']
        path = files_contract.get_security_list_path('stock', exchange)
        df = pd.read_csv(io.BytesIO(response.body), dtype=str)
        if df is not None:
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                df_current = df_current.set_index('code', drop=False)
            else:
                df_current = pd.DataFrame()

            df = df.loc[:, ['Symbol', 'Name', 'IPOyear', 'Sector', 'industry']]
            df = df.dropna(subset=['Symbol', 'Name'])
            df.columns = ['code', 'name', 'listDate', 'sector', 'industry']
            df.listDate = df.listDate.apply(lambda x: to_time_str(x))
            df['exchange'] = exchange
            df['type'] = 'stock'
            df['id'] = df[['type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['sinaIndustry'] = ''
            df['sinaConcept'] = ''
            df['sinaArea'] = ''
            df = df.set_index('code', drop=False)

            diff = set(df.index.tolist()) - set(df_current.index.tolist())
            diff = [item for item in diff if item != 'nan']

            if diff:
                df_current = df_current.append(df.loc[diff, :], ignore_index=False)
                df_current = df_current.loc[:, STOCK_META_COL]
                df_current.columns = STOCK_META_COL
                df_current.to_csv(path, index=False)
Ejemplo n.º 3
0
    def download_stock_list(self, response):
        exchange = response.meta['exchange']
        path = files_contract.get_security_list_path('stock', exchange)
        df = pd.read_csv(io.BytesIO(response.body), dtype=str)
        if df is not None:
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                df_current = df_current.set_index('code', drop=False)
            else:
                df_current = pd.DataFrame()

            df = df.loc[:, ['Symbol', 'Name', 'IPOyear', 'Sector', 'industry']]
            df = df.dropna(subset=['Symbol', 'Name'])
            df.columns = ['code', 'name', 'listDate', 'sector', 'industry']
            df.listDate = df.listDate.apply(lambda x: to_time_str(x))
            df['exchange'] = exchange
            df['type'] = 'stock'
            df['id'] = df[['type', 'exchange',
                           'code']].apply(lambda x: '_'.join(x.astype(str)),
                                          axis=1)
            df['sinaIndustry'] = ''
            df['sinaConcept'] = ''
            df['sinaArea'] = ''
            df = df.set_index('code', drop=False)

            diff = set(df.index.tolist()) - set(df_current.index.tolist())
            diff = [item for item in diff if item != 'nan']

            if diff:
                df_current = df_current.append(df.loc[diff, :],
                                               ignore_index=False)
                df_current = df_current.loc[:, STOCK_META_COL]
                df_current.columns = STOCK_META_COL
                df_current.to_csv(path, index=False)
Ejemplo n.º 4
0
    def download_stock_list(self, response):
        exchange = response.meta['exchange']
        path = files_contract.get_security_list_path('stock', exchange)
        df = None
        if exchange == 'sh':
            df = pd.read_csv(io.BytesIO(response.body), sep='\s+', encoding='GB2312', dtype=str)
        elif exchange == 'sz':
            df = pd.read_excel(io.BytesIO(response.body), sheet_name='上市公司列表', dtype=str)
        if df is not None:
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                df_current = df_current.set_index('code', drop=False)
            else:
                df_current = pd.DataFrame()

            df = df.loc[:, ['A股代码', 'A股简称', 'A股上市日期']]
            df.columns = ['code', 'name', 'listDate']
            df['exchange'] = exchange
            df['type'] = 'stock'
            df['id'] = df[['type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df = df.dropna(axis=0, how='any')
            df = df.set_index('code', drop=False)

            diff = set(df.index.tolist()) - set(df_current.index.tolist())
            diff = [item for item in diff if item != 'nan']

            if diff:
                df_current = df_current.append(df.loc[diff, :], ignore_index=False)
                df_current = df_current.loc[:, STOCK_META_COL]
                df_current.columns = STOCK_META_COL
                df_current.to_csv(path, index=False)
Ejemplo n.º 5
0
def get_security_list(security_type='stock', exchanges=['sh', 'sz'], start=STOCK_START_CODE, end=STOCK_END_CODE,
                      mode='simple', start_date=None):
    if security_type == 'stock':
        df = pd.DataFrame()
        for exchange in exchanges:
            the_path = files_contract.get_security_list_path(security_type, exchange)
            if os.path.exists(the_path):
                if mode == 'simple':
                    df1 = pd.read_csv(the_path,
                                      converters={'code': str})
                else:
                    df1 = pd.read_csv(the_path,
                                      converters={'code': str,
                                                  'sinaIndustry': convert_to_list_if_need,
                                                  'sinaConcept': convert_to_list_if_need,
                                                  'sinaArea': convert_to_list_if_need})
                df = df.append(df1, ignore_index=True)
    elif security_type == 'index':
        df = pd.DataFrame(CHINA_STOCK_INDEX)

    if df.size > 0:
        df = df[df["code"] <= end]
        df = df[df["code"] >= start]
        if start_date:
            df['listDate'] = pd.to_datetime(df['listDate'])
            df = df[df['listDate'] >= pd.Timestamp(start_date)]
        df = df.set_index(df['code'], drop=False)

    return df
Ejemplo n.º 6
0
    def download_stock_list(self, response):
        exchange = response.meta['exchange']
        path = files_contract.get_security_list_path('stock', exchange)
        df = None
        if exchange == 'sh':
            df = pd.read_csv(io.BytesIO(response.body), sep='\s+', encoding='GB2312', dtype=str)
        elif exchange == 'sz':
            df = pd.read_excel(io.BytesIO(response.body), sheet_name='上市公司列表', dtype=str)
        if df is not None:
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                df_current = df_current.set_index('code', drop=False)
            else:
                df_current = pd.DataFrame()

            df = df.loc[:, ['A股代码', 'A股简称', 'A股上市日期']]
            df.columns = ['code', 'name', 'listDate']
            df['exchange'] = exchange
            df['type'] = 'stock'
            df['id'] = df[['type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['timestamp'] = df['listDate']
            df = df.dropna(axis=0, how='any')
            df = df.set_index('code', drop=False)

            # 只添加增量
            diff = set(df.index.tolist()) - set(df_current.index.tolist())
            diff = [item for item in diff if item != 'nan']

            if diff:
                df_current = df_current.append(df.loc[diff, :], ignore_index=False)
                df_current = df_current.loc[:, STOCK_META_COL]
                df_current.columns = STOCK_META_COL
                df_current.to_csv(path, index=False)
Ejemplo n.º 7
0
def init_markets(exchanges=CRYPTOCURRENCY_EXCHANGES):
    for exchange_str in set(ccxt.exchanges) & set(exchanges):
        exchange_dir = get_exchange_dir(security_type='cryptocurrency', exchange=exchange_str)

        # 创建交易所目录
        if not os.path.exists(exchange_dir):
            os.makedirs(exchange_dir)

        exchange = eval("ccxt.{}()".format(exchange_str))
        try:
            markets = exchange.fetch_markets()
            df = pd.DataFrame()

            # markets有些为key=symbol的dict,有些为list
            markets_type = type(markets)
            if markets_type != dict and markets_type != list:
                logger.exception("unknown return markets type {}".format(markets_type))
                return

            for market in markets:
                if markets_type == dict:
                    name = market
                    code = name.replace('/', "-")

                if markets_type == list:
                    name = market['symbol']
                    code = name.replace('/', "-")

                security_item = generate_security_item(security_type='cryptocurrency', exchange=exchange_str,
                                                       code=code,
                                                       name=name, list_date=None)

                kdata_dir = get_kdata_dir(security_item)

                if not os.path.exists(kdata_dir):
                    os.makedirs(kdata_dir)

                df = df.append(security_item, ignore_index=True)

                logger.info("init_markets,exchange:{} security:{}".format(exchange_str, security_item))

                if markets_type == dict:
                    security_info = markets[market]

                if markets_type == list:
                    security_info = market

                # 存储数字货币的meta信息
                if security_info:
                    with open(get_security_meta_path(security_type='cryptocurrency', exchange=exchange_str,
                                                     code=code), "w") as f:
                        json.dump(security_info, f, ensure_ascii=False)

            # 存储该交易所的数字货币列表
            if not df.empty:
                df.to_csv(get_security_list_path(security_type='cryptocurrency', exchange=exchange_str),
                          index=False)
            logger.exception("init_markets for {} success".format(exchange_str))
        except Exception as e:
            logger.exception("init_markets for {} failed".format(exchange_str), e)
Ejemplo n.º 8
0
def get_security_list(security_type='stock', exchanges=['sh', 'sz'], start=None, end=None,
                      mode='simple', start_date=None, codes=None):
    if security_type == 'stock':
        df = pd.DataFrame()
        df_usa = pd.DataFrame()
        for exchange in exchanges:
            the_path = files_contract.get_security_list_path(security_type, exchange)
            if os.path.exists(the_path):
                if exchange == 'sh' or exchange == 'sz':
                    if mode == 'simple':
                        df1 = pd.read_csv(the_path,
                                          converters={'code': str})
                    else:
                        df1 = pd.read_csv(the_path,
                                          converters={'code': str,
                                                      'sinaIndustry': convert_to_list_if_need,
                                                      'sinaConcept': convert_to_list_if_need,
                                                      'sinaArea': convert_to_list_if_need})
                    df = df.append(df1, ignore_index=True)
                elif exchange == 'nasdaq':
                    df_usa = pd.read_csv(the_path, dtype=str)

    elif security_type == 'index':
        df = pd.DataFrame(CHINA_STOCK_INDEX)
        df_usa = pd.DataFrame()
        if 'nasdaq' in exchanges:
            df_usa = pd.DataFrame(USA_STOCK_INDEX)

    if df.size > 0:
        if start:
            df = df[df["code"] <= end]
        if end:
            df = df[df["code"] >= start]
        if start_date:
            df['listDate'] = pd.to_datetime(df['listDate'])
            df = df[df['listDate'] >= pd.Timestamp(start_date)]

        df = df.set_index(df['code'], drop=False)

    if df_usa.size > 0:
        df_usa = df_usa.set_index(df_usa['code'], drop=False)

        if codes:
            df_usa = df_usa.loc[codes]

    df = df.append(df_usa, ignore_index=True)
    return df
Ejemplo n.º 9
0
def parse_shfe_data(force_parse=False):
    the_dir = get_exchange_cache_dir(security_type='future', exchange='shfe')

    need_parse_files = []

    for the_zip_file in [
            os.path.join(the_dir, f) for f in os.listdir(the_dir)
            if f.endswith('.zip')
    ]:
        dst_file = the_zip_file.replace('.zip', ".xls")

        if not os.path.exists(dst_file):
            dst_dir = the_zip_file.replace('.zip', "")
            os.makedirs(dst_dir)

            unzip(the_zip_file, dst_dir)
            files = [
                os.path.join(dst_dir, f) for f in os.listdir(dst_dir)
                if f.endswith('.xls')
            ]
            if len(files) == 1:
                os.rename(files[0], dst_file)
            need_parse_files.append(dst_file)

    if force_parse:
        need_parse_files = [
            os.path.join(the_dir, f) for f in os.listdir(the_dir)
            if f.endswith('.xls')
        ]
    for the_file in need_parse_files:
        logger.info("parse {}".format(the_file))

        df = pd.read_excel(the_file,
                           skiprows=2,
                           skip_footer=4,
                           index_col='合约',
                           converters={'日期': str})
        df.index = pd.Series(df.index).fillna(method='ffill')
        df = df.loc[:, [
            '日期', '前收盘', '前结算', '开盘价', '最高价', '最低价', '收盘价', '结算价', '涨跌1',
            '涨跌2', '成交量', '成交金额', '持仓量'
        ]]
        df.columns = [
            'timestamp', 'preClose', 'preSettlement', 'open', 'high', 'low',
            'close', 'settlement', 'change', 'change1', 'volume', 'turnover',
            'openInterest'
        ]

        # 日期格式统一,方便导入es
        # df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))

        unique_index = df.index.drop_duplicates()

        security_list = get_security_list(security_type='future',
                                          exchanges=['shfe'])

        for the_contract in unique_index:
            logger.info("start handling {} in {}".format(
                the_contract, the_file))
            security_item = {
                'code': the_contract,
                'name': get_future_name(the_contract),
                'id': 'future_{}_{}'.format('shfe', the_contract),
                'exchange': 'shfe',
                'type': 'future'
            }
            # 检查是否需要保存合约meta
            if (not security_list.empty) and ('code' in security_list.columns):
                security_list = security_list.set_index(security_list['code'],
                                                        drop=False)
            if the_contract not in security_list.index:
                security_list = security_list.append(security_item,
                                                     ignore_index=True)
                security_list = security_list.sort_index()
                security_list.to_csv(get_security_list_path('future', 'shfe'),
                                     index=False)

            the_df = df.loc[the_contract, ]
            the_df['code'] = the_contract
            the_df['name'] = get_future_name(the_contract)
            the_df['securityId'] = 'future_{}_{}'.format('shfe', the_contract)
            the_df['changePct'] = the_df['change'] / the_df['preClose']
            the_df['changePct1'] = the_df['change1'] / the_df['preSettlement']

            kdata_path = get_kdata_path(item=security_item, source='exchange')
            # TODO:这些逻辑应该统一处理
            kdata_dir = get_kdata_dir(item=security_item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)

            if os.path.exists(kdata_path):
                saved_df = pd.read_csv(kdata_path, dtype=str)
            else:
                saved_df = pd.DataFrame()

            saved_df = saved_df.append(the_df, ignore_index=True)
            saved_df = saved_df.loc[:, KDATA_FUTURE_COL]

            if not saved_df.empty:
                kdata_df_save(saved_df, kdata_path)

            logger.info("end handling {} in {}".format(the_contract, the_file))
Ejemplo n.º 10
0
def parse_shfe_day_data(force_parse=False):
    cache_dir = get_exchange_cache_dir(security_type='future',
                                       exchange='shfe',
                                       the_year=datetime.datetime.today().year,
                                       data_type="day_kdata")
    the_parsed_path = os.path.join(cache_dir, 'parsed')
    the_parsed = []
    if os.path.exists(the_parsed_path):
        with open(the_parsed_path) as data_file:
            the_parsed = json.load(data_file)

    if force_parse:
        the_dates = [f for f in os.listdir(cache_dir) if f != 'parsed' and f]
    else:
        the_dates = [
            f for f in os.listdir(cache_dir)
            if f != 'parsed' and f not in the_parsed
        ]

    for the_date in the_dates:
        the_path = os.path.join(cache_dir, the_date)
        logger.info("start handling {}".format(the_path))

        with open(the_path, 'r', encoding='UTF8') as f:
            tmp_str = f.read()
            the_json = json.loads(tmp_str)
            the_datas = the_json['o_curinstrument']
            # 日期,代码,名称,最低,开盘,收盘,最高,成交量(手),成交额(元),唯一标识,前收盘,涨跌额,涨跌幅(%),持仓量,结算价,前结算,涨跌额(按结算价),涨跌幅(按结算价)
            KDATA_COLUMN_FUTURE = [
                'timestamp', 'code', 'name', 'low', 'open', 'close', 'high',
                'volume', 'turnover', 'securityId', 'preClose', 'change',
                'changePct', 'openInterest', 'settlement', 'preSettlement',
                'change1', 'changePct1'
            ]
            for the_data in the_datas:
                # {'CLOSEPRICE': 11480,
                #  'DELIVERYMONTH': '1809',
                #  'HIGHESTPRICE': 11555,
                #  'LOWESTPRICE': 11320,
                #  'OPENINTEREST': 425692,
                #  'OPENINTERESTCHG': 3918,
                #  'OPENPRICE': 11495,
                #  'ORDERNO': 0,
                #  'PRESETTLEMENTPRICE': 11545,
                #  'PRODUCTID': 'ru_f    ',
                #  'PRODUCTNAME': '天然橡胶            ',
                #  'PRODUCTSORTNO': 100,
                #  'SETTLEMENTPRICE': 11465,
                #  'VOLUME': 456574,
                #  'ZD1_CHG': -65,
                #  'ZD2_CHG': -80}

                if not re.match("\d{4}", the_data['DELIVERYMONTH']):
                    continue

                code = "{}{}".format(
                    the_data['PRODUCTID'][:the_data['PRODUCTID'].index('_')],
                    the_data['DELIVERYMONTH'])
                logger.info("start handling {} for {}".format(code, the_date))

                name = get_future_name(code)
                security_id = "future_shfe_{}".format(code)

                security_list = get_security_list(security_type='future',
                                                  exchanges=['shfe'])

                logger.info("start handling {} for {}".format(code, the_date))
                security_item = {
                    'code': code,
                    'name': name,
                    'id': security_id,
                    'exchange': 'shfe',
                    'type': 'future'
                }
                # 检查是否需要保存合约meta
                if security_list is not None and 'code' in security_list.columns:
                    security_list = security_list.set_index(
                        security_list['code'], drop=False)
                if code not in security_list.index:
                    security_list = security_list.append(security_item,
                                                         ignore_index=True)
                    security_list.to_csv(get_security_list_path(
                        'future', 'shfe'),
                                         index=False)

                kdata_path = get_kdata_path(item=security_item,
                                            source='exchange')
                # TODO:这些逻辑应该统一处理
                kdata_dir = get_kdata_dir(item=security_item)
                if not os.path.exists(kdata_dir):
                    os.makedirs(kdata_dir)

                if os.path.exists(kdata_path):
                    saved_df = pd.read_csv(kdata_path, dtype=str)
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                else:
                    saved_df = pd.DataFrame()

                if saved_df.empty or the_date not in saved_df.index:
                    low_price = the_data['LOWESTPRICE']
                    if not low_price:
                        low_price = 0
                    open_price = the_data['OPENPRICE']
                    if not open_price:
                        open_price = 0
                    close_price = the_data['CLOSEPRICE']
                    if not close_price:
                        close_price = 0
                    high_price = the_data['HIGHESTPRICE']
                    if not high_price:
                        high_price = 0
                    volume = the_data['VOLUME']
                    if not volume:
                        volume = 0

                    if type(the_data['ZD1_CHG']) == str:
                        change = 0
                    else:
                        change = the_data['ZD1_CHG']

                    if type(the_data['ZD2_CHG']) == str:
                        change1 = 0
                    else:
                        change1 = the_data['ZD2_CHG']

                    pre_close = close_price - change
                    pre_settlement = the_data['PRESETTLEMENTPRICE']

                    # 首日交易
                    if pre_close != 0:
                        change_pct = change / pre_close
                    else:
                        change_pct = 0
                    if pre_settlement != 0:
                        change_pct1 = change1 / pre_settlement
                    else:
                        change_pct1 = 0

                    the_json = {
                        "timestamp":
                        to_time_str(the_date),
                        "code":
                        code,
                        "name":
                        name,
                        "low":
                        low_price,
                        "open":
                        open_price,
                        "close":
                        close_price,
                        "high":
                        high_price,
                        "volume":
                        volume,
                        # 成交额为估算
                        "turnover":
                        (low_price + open_price + close_price + high_price / 4)
                        * volume,
                        "securityId":
                        security_id,
                        "preClose":
                        pre_close,
                        "change":
                        change,
                        "changePct":
                        change_pct,
                        "openInterest":
                        the_data['OPENINTEREST'],
                        "settlement":
                        the_data['SETTLEMENTPRICE'],
                        "preSettlement":
                        the_data['PRESETTLEMENTPRICE'],
                        "change1":
                        change1,
                        "changePct1":
                        change_pct1
                    }
                    saved_df = saved_df.append(the_json, ignore_index=True)
                    saved_df = saved_df.loc[:, KDATA_COLUMN_FUTURE]
                    saved_df = saved_df.drop_duplicates(subset='timestamp',
                                                        keep='last')
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                    saved_df.index = pd.to_datetime(saved_df.index)
                    saved_df = saved_df.sort_index()
                    saved_df.to_csv(kdata_path, index=False)

                    logger.info("end handling {} for {}".format(
                        code, the_date))

                    if the_date not in the_parsed:
                        the_parsed.append(the_date)
        if the_parsed:
            result_list = drop_duplicate(the_parsed)
            result_list = sorted(result_list)

            with open(the_parsed_path, 'w') as outfile:
                json.dump(result_list, outfile)
        logger.info("end handling {}".format(the_path))
Ejemplo n.º 11
0
def get_security_list(security_type='stock',
                      exchanges=None,
                      start=None,
                      end=None,
                      mode='simple',
                      start_list_date=None,
                      codes=None):
    """
    get security list.

    Parameters
    ----------
    security_type : str
        {‘stock’, 'future'},default: stock
    exchanges : str or list
        ['sh', 'sz','nasdaq','nyse','amex','shfe','dce','zce'],default: ['sh','sz']
    start : str
        the start code,work with end,default:None
        if using codes,it would be ignored
    end : str
        the end code,works with start,default:None
        if using codes,it would be ignored
    mode : str
        whether parse more security info,{'simple','es'},default:'simple'
    start_list_date : Timestamp str or Timestamp
        the filter for start list date,default:None
    codes : list
        the exact codes to query,default:None

    Returns
    -------
    DataFrame
        the security list

    """
    df = pd.DataFrame()
    if type(exchanges) == str:
        exchanges = [exchanges]

    if not exchanges:
        exchanges = SECURITY_TYPE_MAP_EXCHANGES[security_type]

    if security_type == 'index':
        df = df.append(pd.DataFrame(CHINA_STOCK_SH_INDEX), ignore_index=True)
        df = df.append(pd.DataFrame(CHINA_STOCK_SZ_INDEX), ignore_index=True)
        df = df.append(pd.DataFrame(USA_STOCK_NASDAQ_INDEX), ignore_index=True)
    else:
        for exchange in exchanges:
            the_path = get_security_list_path(security_type, exchange)
            if os.path.exists(the_path):
                if mode == 'es' and security_type == 'stock':
                    df = df.append(
                        pd.read_csv(the_path,
                                    converters={
                                        'code': str,
                                        'sinaIndustry':
                                        convert_to_list_if_need,
                                        'sinaConcept': convert_to_list_if_need,
                                        'sinaArea': convert_to_list_if_need
                                    }))
                else:
                    df = df.append(pd.read_csv(the_path, dtype=str),
                                   ignore_index=True)

    if not df.empty > 0:
        if start_list_date:
            df['listDate'] = pd.to_datetime(df['listDate'])
            df = df[df['listDate'] >= pd.Timestamp(start_list_date)]

        df = df.set_index(df['code'], drop=False)

        if codes:
            df = df.loc[codes]
        elif start and end:
            df = df[(df["code"] >= start) & (df["code"] <= end)]

        if security_type != 'cryptocurrency':
            df = df.drop_duplicates(subset='code', keep='last')

    return df
Ejemplo n.º 12
0
def get_security_list(security_type='stock',
                      exchanges=['sh', 'sz'],
                      start=None,
                      end=None,
                      mode='simple',
                      start_date=None,
                      codes=None):
    """
    get security list.

    Parameters
    ----------
    security_type : str
        {‘stock’, 'future'},default: stock
    exchanges : list
        ['sh', 'sz','nasdaq','nyse','amex'],default: ['sh','sz']
    start : str
        the start code,default:None
        only works when exchanges is ['sh','sz']
    end : str
        the end code,default:None
        only works when exchanges is ['sh','sz']
    mode : str
        whether parse more security info,{'simple','es'},default:'simple'
    start_date : Timestamp str or Timestamp
        the filter for start list date,default:None
    codes : list
        the exact codes to query,default:None

    Returns
    -------
    DataFrame
        the security list

    """
    if security_type == 'stock':
        df = pd.DataFrame()
        df_usa = pd.DataFrame()
        for exchange in exchanges:
            the_path = files_contract.get_security_list_path(
                security_type, exchange)
            if os.path.exists(the_path):
                if exchange == 'sh' or exchange == 'sz':
                    if mode == 'simple':
                        df1 = pd.read_csv(the_path, converters={'code': str})
                    else:
                        df1 = pd.read_csv(the_path,
                                          converters={
                                              'code': str,
                                              'sinaIndustry':
                                              convert_to_list_if_need,
                                              'sinaConcept':
                                              convert_to_list_if_need,
                                              'sinaArea':
                                              convert_to_list_if_need
                                          })
                    df = df.append(df1, ignore_index=True)
                elif exchange == 'nasdaq':
                    df_usa = pd.read_csv(the_path, dtype=str)

    elif security_type == 'index':
        df = pd.DataFrame(CHINA_STOCK_INDEX)
        df_usa = pd.DataFrame()
        if 'nasdaq' in exchanges:
            df_usa = pd.DataFrame(USA_STOCK_INDEX)

    if df.size > 0:
        if start:
            df = df[df["code"] <= end]
        if end:
            df = df[df["code"] >= start]
        if start_date:
            df['listDate'] = pd.to_datetime(df['listDate'])
            df = df[df['listDate'] >= pd.Timestamp(start_date)]

        df = df.set_index(df['code'], drop=False)

    if df_usa.size > 0:
        df_usa = df_usa.set_index(df_usa['code'], drop=False)

        if codes:
            df_usa = df_usa.loc[codes]

    df = df.append(df_usa, ignore_index=True)
    return df
Ejemplo n.º 13
0
 def spider_closed(self, spider, reason):
     self.sh_df.to_csv(get_security_list_path('stock', 'sh'), index=False)
     self.sz_df.to_csv(get_security_list_path('stock', 'sz'), index=False)
     spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Ejemplo n.º 14
0
def get_security_list(security_type='stock',
                      exchanges=['sh', 'sz'],
                      start=None,
                      end=None,
                      mode='simple',
                      start_list_date=None,
                      codes=None):
    """
    get security list.

    Parameters
    ----------
    security_type : str
        {‘stock’, 'future'},default: stock
    exchanges : list
        ['sh', 'sz','nasdaq','nyse','amex','shfe','dce','zce'],default: ['sh','sz']
    start : str
        the start code,work with end,default:None
        if using codes,it would be ignored
    end : str
        the end code,works with start,default:None
        if using codes,it would be ignored
    mode : str
        whether parse more security info,{'simple','es'},default:'simple'
    start_list_date : Timestamp str or Timestamp
        the filter for start list date,default:None
    codes : list
        the exact codes to query,default:None

    Returns
    -------
    DataFrame
        the security list

    """
    df = pd.DataFrame()
    if security_type == 'stock' or security_type == 'future':
        for exchange in exchanges:
            the_path = get_security_list_path(security_type, exchange)
            if os.path.exists(the_path):
                # 股票的元数据如果存到es,需要做一些转化
                if mode == 'es' and security_type == 'stock':
                    tmp_df = pd.read_csv(the_path,
                                         converters={
                                             'code': str,
                                             'sinaIndustry':
                                             convert_to_list_if_need,
                                             'sinaConcept':
                                             convert_to_list_if_need,
                                             'sinaArea':
                                             convert_to_list_if_need
                                         })
                else:
                    tmp_df = pd.read_csv(the_path, dtype=str)
                df = df.append(tmp_df, ignore_index=True)

    elif security_type == 'index':
        for exchange in exchanges:
            if 'sh' == exchange:
                df = df.append(pd.DataFrame(CHINA_STOCK_SH_INDEX),
                               ignore_index=True)
            if 'sz' == exchange:
                df = df.append(pd.DataFrame(CHINA_STOCK_SZ_INDEX),
                               ignore_index=True)
            if 'nasdaq' == exchange:
                df = df.append(pd.DataFrame(USA_STOCK_NASDAQ_INDEX),
                               ignore_index=True)

    if df.size > 0:
        if start_list_date:
            df['listDate'] = pd.to_datetime(df['listDate'])
            df = df[df['listDate'] >= pd.Timestamp(start_list_date)]

        df = df.set_index(df['code'], drop=False)

        if codes:
            df = df.loc[codes]
        elif start and end:
            df = df[(df["code"] >= start) & (df["code"] <= end)]

    # FIXME:
    # 期货列表有重复的数据,需要检查一下
    df = df.drop_duplicates(subset='code', keep='last')

    return df