Example #1
0
    def fix_time(self, stock_list: list or str, factors: list or f,
                 trade_date: datetime.date or str):
        # 格式化参数
        start = time.time()
        stock_list = convert_11code(stock_list)
        mark2factor_map = dis_mark2factor_map(factors, self.factor2mark_map)
        trade_date = convert2datetime(trade_date)

        # 用股票列表来初始化 ret
        ret = numpy.array(list(zip(stock_list)), dtype=[('stock', 'U11')])

        columns = list()
        for mark in mark2factor_map:
            # instance 是一个实例化的三戟叉对象
            instance = self.mark2instance_map.get(mark, None)
            if instance:
                sub_ret = instance.fix_time(stock_list, mark2factor_map[mark],
                                            trade_date)
                sub_column = list(sub_ret.dtype.names)[1:]
                columns = columns + sub_column  # 扩充columns名称
                sub_ret = sub_ret[sub_column]  # 只取字段值
                # 将ret结果不断merge
                ret = rfn.merge_arrays((ret, sub_ret),
                                       asrecarray=True,
                                       flatten=True)

        ret.dtype.names = ['stock'] + columns

        end = time.time()
        print('fix_time耗时:', end - start)

        # 把[numpy.recarray]对象转化为[numpy.ndarray]对象
        return numpy.array(ret)
Example #2
0
    def fix_symbol(self, stock: str, factors: list or f,
                   start_date: datetime.date or str, end_date: datetime.date
                   or str):
        start = time.time()
        # 格式化参数
        stock = convert_11code(stock)[0]
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)
        mark2factor_map = dis_mark2factor_map(factors, self.factor2mark_map)

        # 初始化 ret
        calendar = TradeCalendar()
        calendar = calendar.calendar(start_date, end_date)
        calendar = calendar[calendar['trade']]['date']
        ret = numpy.array(calendar, dtype=[('date', 'U10')])

        columns = list()
        for mark in mark2factor_map:
            instance = self.mark2instance_map.get(mark, None)
            if instance:
                sub_ret = instance.fix_symbol(stock, mark2factor_map[mark],
                                              start_date, end_date)
                sub_column = list(sub_ret.dtype.names)[1:]
                columns = columns + sub_column
                sub_ret = sub_ret[sub_column]
                ret = rfn.merge_arrays((ret, sub_ret),
                                       asrecarray=True,
                                       flatten=True)

        ret.dtype.names = ['date'] + columns
        end = time.time()
        print('fix_symbol耗时:', end - start)

        return numpy.array(ret)
    def fix_factor(self, stock_list: list, factor: f or list, start_date: str or datetime.date,
                   end_date: str or datetime.date):

        # 格式化参数
        stock_list = convert_11code(stock_list)
        collection2factor_map = dis_collection2factor_map(factor, self.factor2collection_map)
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)
        end_date = end_date + datetime.timedelta(hours=23, minutes=59, seconds=59)

        # 查询停牌时间
        calendar = TradeCalendar().calendar(start_date.strftime("%Y%m%d"),
                                            end_date.strftime("%Y%m%d"))
        trading_calendar = calendar['date'][calendar['trade']]
        trading_calendar_index = pandas.DataFrame(trading_calendar,
                                                  columns=['index']).set_index('index')
        rett = trading_calendar_index.copy()

        # 确定要查询的集合和字段值
        collection, field = list(collection2factor_map.items())[0]
        field = field[0].name
        snap = ['SecuCode', 'PubDate', field]
        doc_snap = {k: 1 for k in snap}
        doc_snap["_id"] = 0

        # 查询
        db_coll = connect_coll(collection, self._db)
        ret = db_coll.find({'SecuCode': {'$in': stock_list},
                            "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap)

        # 生成查询结果
        data = pandas.DataFrame(list(ret))

        # 对查询结果进行规范化
        if not data.empty:
            data[snap[1]] = data[snap[1]].map(lambda x: x.strftime('%Y-%m-%d'))
            data = data[snap]
            data.columns = ['stock', 'time', field]
            data = pandas.crosstab(data['time'], data['stock'], values=data[field], aggfunc='last')

        # 更新 rett
        rett = data.merge(rett, left_index=True, right_index=True, how='outer')
        rett = rett.fillna(method='pad')  # 先向后填充数据
        rett = rett.ix[trading_calendar_index.index]  # 再以日历限制一次日期

        to_concat_ret = pandas.DataFrame(dict(zip(stock_list, [1] * len(stock_list))),index=['1'])
        rett = pandas.concat([rett, to_concat_ret])
        rett = rett.drop(['1'])

        # 整理结果
        rett = rett.astype(float)
        rett = rett.to_records()
        rett.dtype.names = ['date'] + list(rett.dtype.names)[1:]

        # 暂时返回structured array,后面可以让用户选择返回pandas
        # 固定了因子的表格,表的行索引是股票,列索引是时间
        return numpy.array(rett)
    def fix_time(self, stock_list: list or str, factors: list or f,
                 trade_date: datetime.date or str):

        stock_list = convert_11code(stock_list)
        collection2factor_map = dis_collection2factor_map(
            factors, self.factor2collection_map)

        start_date = datetime.datetime(2010, 8, 14)
        end_date = start_date + datetime.timedelta(
            hours=23, minutes=59, seconds=59)

        dtypes = [("stock", "U11")]
        for ft in factors:
            dtypes.append((ft.name, "<f8"))

        result = numpy.full((len(stock_list), ), numpy.NAN, dtype=dtypes)
        result["stock"] = stock_list

        indexes = {}

        for collection in collection2factor_map:
            factor_name_list = gen_factor_name_list(
                collection2factor_map[collection])
            snap = [
                'SecuCode',
                'PubDate',
            ]
            snap.extend(factor_name_list)
            doc_snap = {k: 1 for k in snap}
            doc_snap["_id"] = 0

            db_coll = connect_coll(collection, self._db)
            data = db_coll.find(
                {
                    'SecuCode': {
                        '$in': stock_list
                    },
                    "PubDate": {
                        "$gte": start_date,
                        "$lte": end_date
                    }
                }, doc_snap).sort("PubDate", pymongo.ASCENDING)

            for d in data:
                code = d['SecuCode']
                idx = indexes.get(code)
                if idx is None:
                    idx = find_date_in_array(code, result["stock"])
                    if idx == -1:
                        system_log.warning(
                            f"[Finance.fix_time] code index not found. record={d}"
                        )
                        continue
                    indexes[code] = idx
                for ft in factor_name_list:
                    result[idx][ft] = d[ft]
        return result
    def fix_factor(self, stock_list: list, factor: f or list, time: str
                   or datetime.date, frequency: (1, 2, 3, 4)):
        # 格式化参数
        stock_list = convert_11code(stock_list)
        collection2factor_map = dis_collection2factor_map(
            factor, self.factor2collection_map)
        collection, field = list(collection2factor_map.items())[0]
        field = field[0].name

        snap = ['SecuCode', 'PubDate', field]
        doc_snap = {k: 1 for k in snap}
        doc_snap["_id"] = 0

        # 测试时间点
        start_date = datetime.datetime(2015, 1, 1)
        end_date = datetime.datetime(2015, 3, 31)

        # 查询
        db_coll = connect_coll(collection, self._db)
        ret = db_coll.find(
            {
                'SecuCode': {
                    '$in': stock_list
                },
                "PubDate": {
                    "$gte": start_date,
                    "$lte": end_date
                }
            }, doc_snap).sort('PubDate', pymongo.ASCENDING)

        dtypes = [("date", "uint32")]
        # 转换证券代码标识
        for code in stock_list:
            dtypes.append((code, "<f8"))

        trade_days = TradeCalendar().calendar(start_date, end_date)

        result = numpy.full((trade_days.shape[0], ), numpy.NaN, dtype=dtypes)

        result["date"] = trade_days
        indexes = {}
        for r in ret:
            t = yyyymmdd_date(r['PubDate'])
            idx = indexes.get(t)
            if not idx:
                idx = find_date_in_array(t, result["date"])
                if idx == -1:  # 非交易日
                    system_log.warning(
                        f"[Finance.fix_factor] date index not found. record={r}"
                    )
                    continue
                indexes[t] = idx
            result[r.get('SecuCode')][idx] = r[field]
        return result
    def fix_symbol(self, stock: str, factors: list or f, start_date: datetime.date or str,
                   end_date: datetime.date or str):
        # 格式化参数
        stock = convert_11code(stock)[0]
        collection2factor_map = dis_collection2factor_map(factors, self.factor2collection_map)
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)
        end_date = end_date + datetime.timedelta(hours=23, minutes=59, seconds=59)

        # 查询停牌时间
        calendar = TradeCalendar().calendar(start_date.strftime("%Y%m%d"),
                                            end_date.strftime("%Y%m%d"))
        trading_calendar = calendar['date'][calendar['trade']]
        trading_calendar_index = pandas.DataFrame(trading_calendar,
                                                  columns=['index']).set_index('index')
        rett = trading_calendar_index.copy()

        # 确定要查询的集合和字段值
        _year = int(start_date.strftime("%Y")) - 1
        start_date = datetime.datetime(_year, 1, 1, 0, 0, 0)
        for collection in collection2factor_map:
            factor_name_list = gen_factor_name_list(collection2factor_map[collection])
            snap = ['PubDate', ]
            snap.extend(factor_name_list)
            doc_snap = {k: 1 for k in snap}
            doc_snap["_id"] = 0

            # 查询
            db_coll = connect_coll(collection, self._db)
            data = db_coll.find({'SecuCode': stock,
                                 "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap)

            # 生成查询结果
            data = pandas.DataFrame(list(data))

            # 对查询结果进行规范化
            if not data.empty:
                data[snap[0]] = data[snap[0]].map(lambda x: x.strftime('%Y-%m-%d'))
                data = data[snap]
                data = data.set_index(snap[0])

            # 循环更新 rett
            rett = rett.merge(data, left_index=True, right_index=True, how='outer')
            rett[factor_name_list] = rett[factor_name_list].fillna(method='pad')  # 先向后填充数据
            rett = rett.ix[trading_calendar_index.index]  # 再以日历限制一次日期

        # 整理结果
        rett = rett.astype(float).to_records()
        rett.dtype.names = ['date'] + list(rett.dtype.names)[1:]

        return numpy.array(rett)
    def fix_time(self, stock_list: list or str, factors: list or f,
                 trade_date: datetime.date or str):
        # 格式化参数
        stock_list = convert_11code(stock_list)
        collection2factor_map = dis_collection2factor_map(factors, self.factor2collection_map)
        start_date = convert2datetime(trade_date)
        end_date = start_date + datetime.timedelta(hours=23, minutes=59, seconds=59)

        # 补充缺失股票
        rett = pandas.DataFrame(stock_list, columns=['stock']).set_index('stock')

        # 确定要查询的集合和字段值
        _year = int(start_date.strftime("%Y")) - 1
        start_date = datetime.datetime(_year, 1, 1, 0, 0, 0)
        for collection in collection2factor_map:
            factor_name_list = gen_factor_name_list(collection2factor_map[collection])
            snap = ['SecuCode', 'PubDate',]
            snap.extend(factor_name_list)
            doc_snap = {k: 1 for k in snap}
            doc_snap["_id"] = 0

            # 查询
            db_coll = connect_coll(collection, self._db)
            data = db_coll.find({'SecuCode': {'$in': stock_list},
                                 "PubDate": {"$gte": start_date, "$lte": end_date}}, doc_snap)

            # 生成查询结果
            data = pandas.DataFrame(list(data))

            # 对查询结果进行规范化
            if not data.empty:
                data = data[snap].set_index('PubDate')
                data.columns = ['stock'] + factor_name_list
            if len(data):
                data = data.groupby('stock')
                data = pandas.DataFrame([i[1].iloc[-1] for i in data]).set_index('stock')
            else:
                data = data.set_index('stock')

            # 循环更新 rett
            rett = rett.merge(data, left_index=True, right_index=True, how='outer')

        # 整理结果
        rett = rett.astype(float).to_records()
        rett.dtype.names = ['stock'] + list(rett.dtype.names)[1:]

        return numpy.array(rett)
Example #8
0
    def fix_factor(self, stock_list: list or str, factor: f,
                   start_date: datetime.date or str, end_date: datetime.date
                   or str):
        start = time.time()
        # 格式化参数
        stock_list = convert_11code(stock_list)
        start_date = convert2datetime(start_date)
        end_date = convert2datetime(end_date)

        mark2factor_map = dis_mark2factor_map(factor, self.factor2mark_map)
        if not mark2factor_map:
            print('未收录因子' + factor.name + ',或您没有获取该因子权限,请联系管理员。')
            return
        mark = list(mark2factor_map.keys())[0]

        instance = self.mark2instance_map.get(mark, None)
        ret = instance.fix_factor(stock_list, factor, start_date,
                                  end_date) if instance else None
        end = time.time()
        print(factor[0].name)
        print('fix_factor耗时:', end - start)
        # 这个接口不需要切片,也不需要转换对象格式
        return ret
    def fix_symbol(self, stock: str, factors: list or f, time: str
                   or datetime.date, frequency: (1, 2, 3, 4)):
        stock = convert_11code(stock)[0]
        print(stock)
        collection2factor_map = dis_collection2factor_map(
            factors, self.factor2collection_map)

        # 测试时间点
        start_date = datetime.datetime(2010, 1, 1)
        end_date = datetime.datetime(2015, 3, 31)

        calendar = TradeCalendar()
        trade_days = calendar.calendar(start_date, end_date)
        print("==>", trade_days)

        dtypes = [("date", "uint32")]
        for ft in factors:
            dtypes.append((ft.name, "<f8"))
        print("###", dtypes)

        for collection in collection2factor_map:
            factor_name_list = gen_factor_name_list(
                collection2factor_map[collection])
            print("-->", factor_name_list)
            snap = [
                'PubDate',
            ]
            snap.extend(factor_name_list)
            doc_snap = {k: 1 for k in snap}
            doc_snap["_id"] = 0

            db_coll = connect_coll(collection, self._db)
            data = db_coll.find(
                {
                    'SecuCode': stock,
                    "PubDate": {
                        "$gte": start_date,
                        "$lte": end_date
                    }
                }, doc_snap).sort("PubDate", pymongo.ASCENDING)

            # 初始化结果
            result = numpy.full((trade_days.shape[0], ),
                                numpy.NAN,
                                dtype=dtypes)
            result["date"] = trade_days

            indexes = {}
            for d in data:
                t = yyyymmdd_date(d["PubDate"])
                print(t)
                idx = indexes.get(t)
                print(idx, "---")
                if not idx:
                    idx = find_date_in_array(t, result["date"])
                    print(idx, "===")
                    if idx == -1:
                        system_log.warning(
                            f"[Finance.fix_symbol] date index not found. record={d}"
                        )
                        continue
                    indexes[t] = idx
                for ft in factor_name_list:
                    result[idx][ft] = d[ft]
        return result