Ejemplo n.º 1
0
 def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
     if raw_data.shape[0] == 0:
         return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
     raw_data = raw_data.set_index(["日期", "ID"])
     DataType = self.getFactorMetaData(factor_names=factor_names,
                                       key="DataType")
     Data = {}
     for iFactorName in raw_data.columns:
         iRawData = raw_data[iFactorName].unstack()
         if DataType[iFactorName] == "double":
             iRawData = iRawData.astype("float")
         Data[iFactorName] = iRawData
     Data = pd.Panel(Data).loc[factor_names]
     Data.major_axis = [
         dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis
     ]
     if Data.minor_axis.intersection(ids).shape[0] == 0:
         return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
     LookBack = args.get("回溯天数", self.LookBack)
     if LookBack == 0: return Data.loc[:, dts, ids]
     AllDTs = Data.major_axis.union(dts).sort_values()
     Data = Data.loc[:, AllDTs, ids]
     Limits = LookBack * 24.0 * 3600
     for i, iFactorName in enumerate(Data.items):
         Data.iloc[i] = fillNaByLookback(Data.iloc[i], lookback=Limits)
     return Data.loc[:, dts]
Ejemplo n.º 2
0
def _adjustData(data, look_back, factor_names, ids, dts):
    if ids is not None:
        data = pd.Panel(data).loc[factor_names, :, ids]
    else:
        data = pd.Panel(data).loc[factor_names, :, :]
    if look_back == 0:
        if dts is not None:
            return data.loc[:, dts]
        else:
            return data
    if dts is not None:
        AllDTs = data.major_axis.union(dts).sort_values()
        data = data.loc[:, AllDTs, :]
    if np.isinf(look_back):
        for i, iFactorName in enumerate(data.items):
            data.iloc[i].fillna(method="pad", inplace=True)
    else:
        data = dict(data)
        Limits = look_back * 24.0 * 3600
        for iFactorName in data:
            data[iFactorName] = fillNaByLookback(data[iFactorName],
                                                 lookback=Limits)
        data = pd.Panel(data).loc[factor_names]
    if dts is not None:
        return data.loc[:, dts]
    else:
        return data
Ejemplo n.º 3
0
 def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
     if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
     raw_data = raw_data.set_index(["日期", "ID"])
     Operator = args.get("算子", self.Operator)
     if Operator is None: Operator = (lambda x: x.tolist())
     Data = {}
     for iFactorName in factor_names:
         Data[iFactorName] = raw_data[iFactorName].groupby(axis=0, level=[0, 1]).apply(Operator).unstack()
     Data = pd.Panel(Data).loc[factor_names, :, ids]
     Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
     LookBack = args.get("回溯天数", self.LookBack)
     if LookBack==0: return Data.loc[:, dts, ids]
     AllDTs = Data.major_axis.union(dts).sort_values()
     Data = Data.loc[:, AllDTs, ids]
     Limits = LookBack*24.0*3600
     for i, iFactorName in enumerate(Data.items):
         Data.iloc[i] = fillNaByLookback(Data.iloc[i], lookback=Limits)
     return Data.loc[:, dts]
Ejemplo n.º 4
0
def adjustDataDTID(data,
                   look_back,
                   factor_names,
                   ids,
                   dts,
                   only_start_lookback=False,
                   only_lookback_nontarget=False,
                   only_lookback_dt=False,
                   logger=None):
    if look_back == 0:
        try:
            return data.loc[:, dts, ids]
        except KeyError as e:
            if logger is not None:
                logger.warning("待提取的因子 %s 数据超出了原始数据的时点或 ID 范围, 将填充缺失值!" %
                               (str(list(data.items)), ))
            return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
    AllDTs = data.major_axis.union(dts).sort_values()
    AdjData = data.loc[:, AllDTs, ids]
    if only_start_lookback:  # 只在起始时点回溯填充缺失
        AllAdjData = AdjData
        AdjData = AllAdjData.loc[:, :dts[0], :]
        TargetDTs = dts[:1]
    else:
        TargetDTs = dts
    if only_lookback_dt:
        TargetDTs = sorted(set(TargetDTs).difference(data.major_axis))
    if TargetDTs:
        Limits = look_back * 24.0 * 3600
        if only_lookback_nontarget:  # 只用非目标时间序列的数据回溯填充
            Mask = pd.Series(np.full(shape=(AdjData.shape[1], ),
                                     fill_value=False,
                                     dtype=np.bool),
                             index=AdjData.major_axis)
            Mask[TargetDTs] = True
            FillMask = Mask.copy()
            FillMask[Mask.astype("int").diff() != 1] = False
            TimeDelta = pd.Series(np.r_[0,
                                        np.diff(Mask.index.values) /
                                        np.timedelta64(1, "D")],
                                  index=Mask.index)
            TimeDelta[(Mask & (~FillMask)) |
                      (Mask.astype("int").diff() == -1)] = 0
            TimeDelta = TimeDelta.cumsum().loc[TargetDTs]
            FirstDelta = TimeDelta.iloc[0]
            TimeDelta = TimeDelta.diff().fillna(value=0)
            TimeDelta.iloc[0] = FirstDelta
            NewLimits = np.minimum(TimeDelta.values * 24.0 * 3600,
                                   Limits).reshape(
                                       (TimeDelta.shape[0],
                                        1)).repeat(AdjData.shape[2], axis=1)
            Limits = pd.DataFrame(0,
                                  index=AdjData.major_axis,
                                  columns=AdjData.minor_axis)
            Limits.loc[TargetDTs, :] = NewLimits
        if only_lookback_dt:
            Mask = pd.Series(np.full(shape=(AdjData.shape[1], ),
                                     fill_value=False,
                                     dtype=np.bool),
                             index=AdjData.major_axis)
            Mask[TargetDTs] = True
            FillMask = Mask.copy()
            FillMask[Mask.astype("int").diff() != 1] = False
            FillMask = FillMask.loc[TargetDTs]
            TimeDelta = pd.Series(np.r_[0,
                                        np.diff(Mask.index.values) /
                                        np.timedelta64(1, "D")],
                                  index=Mask.index).loc[TargetDTs]
            NewLimits = TimeDelta.cumsum().loc[TargetDTs]
            Temp = NewLimits.copy()
            Temp[~FillMask] = np.nan
            Temp = Temp.fillna(method="pad")
            TimeDelta[~FillMask] = np.nan
            NewLimits = NewLimits - Temp + TimeDelta.fillna(method="pad")
            if isinstance(Limits, pd.DataFrame):
                Limits.loc[TargetDTs, :] = np.minimum(
                    NewLimits.values.reshape(
                        (NewLimits.shape[0], 1)).repeat(AdjData.shape[2],
                                                        axis=1),
                    Limits.loc[TargetDTs].values)
            else:
                NewLimits = np.minimum(NewLimits.values * 24.0 * 3600,
                                       Limits).reshape(
                                           (NewLimits.shape[0],
                                            1)).repeat(AdjData.shape[2],
                                                       axis=1)
                Limits = pd.DataFrame(0,
                                      index=AdjData.major_axis,
                                      columns=AdjData.minor_axis)
                Limits.loc[TargetDTs, :] = NewLimits
        if np.isinf(look_back) and (not only_lookback_nontarget) and (
                not only_lookback_dt):
            for i, iFactorName in enumerate(AdjData.items):
                AdjData.iloc[i].fillna(method="pad", inplace=True)
        else:
            AdjData = dict(AdjData)
            for iFactorName in AdjData:
                AdjData[iFactorName] = fillNaByLookback(AdjData[iFactorName],
                                                        lookback=Limits)
            AdjData = pd.Panel(AdjData).loc[factor_names]
    if only_start_lookback:
        AllAdjData.loc[:, dts[0], :] = AdjData.loc[:, dts[0], :]
        return AllAdjData.loc[:, dts]
    else:
        return AdjData.loc[:, dts]