Ejemplo n.º 1
0
    def fix_symbol(self, stock: str, factors: list or f,
                   start_date: datetime.date or str, end_date: datetime.date
                   or str):
        start = time.time()
        # 格式化参数
        stock = convert_11code(stock)[0]
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)
        mark2factor_map = dis_mark2factor_map(factors, self.factor2mark_map)

        # 初始化 ret
        calendar = TradeCalendar()
        calendar = calendar.calendar(start_date, end_date)
        calendar = calendar[calendar['trade']]['date']
        ret = numpy.array(calendar, dtype=[('date', 'U10')])

        columns = list()
        for mark in mark2factor_map:
            instance = self.mark2instance_map.get(mark, None)
            if instance:
                sub_ret = instance.fix_symbol(stock, mark2factor_map[mark],
                                              start_date, end_date)
                sub_column = list(sub_ret.dtype.names)[1:]
                columns = columns + sub_column
                sub_ret = sub_ret[sub_column]
                ret = rfn.merge_arrays((ret, sub_ret),
                                       asrecarray=True,
                                       flatten=True)

        ret.dtype.names = ['date'] + columns
        end = time.time()
        print('fix_symbol耗时:', end - start)

        return numpy.array(ret)
    def fix_factor(self, stock_list: list, factor: f or list, start_date: str or datetime.date,
                   end_date: str or datetime.date):

        # 格式化参数
        stock_list = convert_11code(stock_list)
        collection2factor_map = dis_collection2factor_map(factor, self.factor2collection_map)
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)
        end_date = end_date + datetime.timedelta(hours=23, minutes=59, seconds=59)

        # 查询停牌时间
        calendar = TradeCalendar().calendar(start_date.strftime("%Y%m%d"),
                                            end_date.strftime("%Y%m%d"))
        trading_calendar = calendar['date'][calendar['trade']]
        trading_calendar_index = pandas.DataFrame(trading_calendar,
                                                  columns=['index']).set_index('index')
        rett = trading_calendar_index.copy()

        # 确定要查询的集合和字段值
        collection, field = list(collection2factor_map.items())[0]
        field = field[0].name
        snap = ['SecuCode', 'PubDate', field]
        doc_snap = {k: 1 for k in snap}
        doc_snap["_id"] = 0

        # 查询
        db_coll = connect_coll(collection, self._db)
        ret = db_coll.find({'SecuCode': {'$in': stock_list},
                            "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap)

        # 生成查询结果
        data = pandas.DataFrame(list(ret))

        # 对查询结果进行规范化
        if not data.empty:
            data[snap[1]] = data[snap[1]].map(lambda x: x.strftime('%Y-%m-%d'))
            data = data[snap]
            data.columns = ['stock', 'time', field]
            data = pandas.crosstab(data['time'], data['stock'], values=data[field], aggfunc='last')

        # 更新 rett
        rett = data.merge(rett, left_index=True, right_index=True, how='outer')
        rett = rett.fillna(method='pad')  # 先向后填充数据
        rett = rett.ix[trading_calendar_index.index]  # 再以日历限制一次日期

        to_concat_ret = pandas.DataFrame(dict(zip(stock_list, [1] * len(stock_list))),index=['1'])
        rett = pandas.concat([rett, to_concat_ret])
        rett = rett.drop(['1'])

        # 整理结果
        rett = rett.astype(float)
        rett = rett.to_records()
        rett.dtype.names = ['date'] + list(rett.dtype.names)[1:]

        # 暂时返回structured array,后面可以让用户选择返回pandas
        # 固定了因子的表格,表的行索引是股票,列索引是时间
        return numpy.array(rett)
    def fix_symbol(self, stock: str, factors: list or f, start_date: datetime.date or str,
                   end_date: datetime.date or str):
        # 格式化参数
        stock = convert_11code(stock)[0]
        collection2factor_map = dis_collection2factor_map(factors, self.factor2collection_map)
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)
        end_date = end_date + datetime.timedelta(hours=23, minutes=59, seconds=59)

        # 查询停牌时间
        calendar = TradeCalendar().calendar(start_date.strftime("%Y%m%d"),
                                            end_date.strftime("%Y%m%d"))
        trading_calendar = calendar['date'][calendar['trade']]
        trading_calendar_index = pandas.DataFrame(trading_calendar,
                                                  columns=['index']).set_index('index')
        rett = trading_calendar_index.copy()

        # 确定要查询的集合和字段值
        _year = int(start_date.strftime("%Y")) - 1
        start_date = datetime.datetime(_year, 1, 1, 0, 0, 0)
        for collection in collection2factor_map:
            factor_name_list = gen_factor_name_list(collection2factor_map[collection])
            snap = ['PubDate', ]
            snap.extend(factor_name_list)
            doc_snap = {k: 1 for k in snap}
            doc_snap["_id"] = 0

            # 查询
            db_coll = connect_coll(collection, self._db)
            data = db_coll.find({'SecuCode': stock,
                                 "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap)

            # 生成查询结果
            data = pandas.DataFrame(list(data))

            # 对查询结果进行规范化
            if not data.empty:
                data[snap[0]] = data[snap[0]].map(lambda x: x.strftime('%Y-%m-%d'))
                data = data[snap]
                data = data.set_index(snap[0])

            # 循环更新 rett
            rett = rett.merge(data, left_index=True, right_index=True, how='outer')
            rett[factor_name_list] = rett[factor_name_list].fillna(method='pad')  # 先向后填充数据
            rett = rett.ix[trading_calendar_index.index]  # 再以日历限制一次日期

        # 整理结果
        rett = rett.astype(float).to_records()
        rett.dtype.names = ['date'] + list(rett.dtype.names)[1:]

        return numpy.array(rett)
Ejemplo n.º 4
0
    def fix_time(self, stock_list: list or str, factors: list or f,
                 trade_date: datetime.date or str):
        # 格式化参数
        start = time.time()
        stock_list = convert_11code(stock_list)
        mark2factor_map = dis_mark2factor_map(factors, self.factor2mark_map)
        trade_date = convert2datetime(trade_date)

        # 用股票列表来初始化 ret
        ret = numpy.array(list(zip(stock_list)), dtype=[('stock', 'U11')])

        columns = list()
        for mark in mark2factor_map:
            # instance 是一个实例化的三戟叉对象
            instance = self.mark2instance_map.get(mark, None)
            if instance:
                sub_ret = instance.fix_time(stock_list, mark2factor_map[mark],
                                            trade_date)
                sub_column = list(sub_ret.dtype.names)[1:]
                columns = columns + sub_column  # 扩充columns名称
                sub_ret = sub_ret[sub_column]  # 只取字段值
                # 将ret结果不断merge
                ret = rfn.merge_arrays((ret, sub_ret),
                                       asrecarray=True,
                                       flatten=True)

        ret.dtype.names = ['stock'] + columns

        end = time.time()
        print('fix_time耗时:', end - start)

        # 把[numpy.recarray]对象转化为[numpy.ndarray]对象
        return numpy.array(ret)
Ejemplo n.º 5
0
    def fix_factor(self, stock_list: list or str, factor: f,
                   start_date: datetime.date or str, end_date: datetime.date
                   or str):
        start = time.time()
        # 格式化参数
        stock_list = convert_11code(stock_list)
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)

        mark2factor_map = dis_mark2factor_map(factor, self.factor2mark_map)
        if not mark2factor_map:
            print('未收录因子' + factor.name + ',或您没有获取该因子权限,请联系管理员。')
            return
        mark = list(mark2factor_map.keys())[0]

        instance = self.mark2instance_map.get(mark, None)
        ret = instance.fix_factor(stock_list, factor, start_date,
                                  end_date) if instance else None
        end = time.time()
        print(factor[0].name)
        print('fix_factor耗时:', end - start)
        # 这个接口不需要切片,也不需要转换对象格式
        return ret
    def fix_time(self, stock_list: list or str, factors: list or f,
                 trade_date: datetime.date or str):
        # 格式化参数
        stock_list = convert_11code(stock_list)
        collection2factor_map = dis_collection2factor_map(factors, self.factor2collection_map)
        start_date = convert2datetime(trade_date)
        end_date = start_date + datetime.timedelta(hours=23, minutes=59, seconds=59)

        # 补充缺失股票
        rett = pandas.DataFrame(stock_list, columns=['stock']).set_index('stock')

        # 确定要查询的集合和字段值
        _year = int(start_date.strftime("%Y")) - 1
        start_date = datetime.datetime(_year, 1, 1, 0, 0, 0)
        for collection in collection2factor_map:
            factor_name_list = gen_factor_name_list(collection2factor_map[collection])
            snap = ['SecuCode', 'PubDate',]
            snap.extend(factor_name_list)
            doc_snap = {k: 1 for k in snap}
            doc_snap["_id"] = 0

            # 查询
            db_coll = connect_coll(collection, self._db)
            data = db_coll.find({'SecuCode': {'$in': stock_list},
                                 "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap)

            # 生成查询结果
            data = pandas.DataFrame(list(data))

            # 对查询结果进行规范化
            if not data.empty:
                data = data[snap].set_index('PubDate')
                data.columns = ['stock'] + factor_name_list
            if len(data):
                data = data.groupby('stock')
                data = pandas.DataFrame([i[1].iloc[-1] for i in data]).set_index('stock')
            else:
                data = data.set_index('stock')

            # 循环更新 rett
            rett = rett.merge(data, left_index=True, right_index=True, how='outer')

        # 整理结果
        rett = rett.astype(float).to_records()
        rett.dtype.names = ['stock'] + list(rett.dtype.names)[1:]

        return numpy.array(rett)