def __call__(self, center: float): if self.b_type == BoundaryType.ABSOLUTE: return self.val + center elif self.b_type == BoundaryType.MAXABSREL: abs_threshold = self.val[0] rel_threshold = self.val[1] if self.direction == BoundaryDirection.LOWER: rel_bound = center - abs(center) * rel_threshold abs_bound = center - abs_threshold return min(rel_bound, abs_bound) elif self.direction == BoundaryDirection.UPPER: rel_bound = center + abs(center) * rel_threshold abs_bound = center + abs_threshold return max(rel_bound, abs_bound) elif self.b_type == BoundaryType.MINABSREL: abs_threshold = self.val[0] rel_threshold = self.val[1] if self.direction == BoundaryDirection.LOWER: rel_bound = center - abs(center) * rel_threshold abs_bound = center - abs_threshold return max(rel_bound, abs_bound) elif self.direction == BoundaryDirection.UPPER: rel_bound = center + abs(center) * rel_threshold abs_bound = center + abs_threshold return min(rel_bound, abs_bound) else: require(center >= 0., ValueError, "relative bounds only support positive back bone value") return self.val * center
def __init__(self, features=None, weights: dict = None, fit_target=None): super().__init__(features=features, fit_target=fit_target) if features is not None and weights is not None: require( len(features) == len(weights), ValueError, "length of features is not equal to length of weights") if weights: self.impl = ConstLinearModelImpl( np.array([weights[name] for name in self.features]))
def _validation(self): require( self.b_type in [ BoundaryType.ABSOLUTE, BoundaryType.RELATIVE, BoundaryType.MAXABSREL, BoundaryType.MINABSREL ], ValueError, "Boundary Type {0} is not recognized".format(self.b_type)) require( self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER, ValueError, "Boundary direction {0} is not recognized".format(self.direction))
def __init__(self, bounds: Dict[str, BoxBoundary], cons_mat: pd.DataFrame, backbone: np.ndarray = None): self.names = list( set(bounds.keys()).intersection(set(cons_mat.columns))) self.bounds = bounds self.cons_mat = cons_mat self.backbone = backbone require( cons_mat.shape[0] == len(backbone) if backbone is not None else True, "length of back bond should be same as number of rows of cons_mat")
def factor_translator(factor_pool): if not factor_pool: return None, None if isinstance(factor_pool, str): return {factor_pool: factor_pool}, [factor_pool] elif isinstance(factor_pool, SecurityValueHolder): return {str(factor_pool): factor_pool}, sorted(factor_pool.fields) elif isinstance(factor_pool, dict): dependency = set() for k, v in factor_pool.items(): require(isinstance(k, str), ValueError, 'factor_name {0} should be string.'.format(k)) require( isinstance(v, SecurityValueHolder) or isinstance(v, str), ValueError, 'expression {0} should be a value hodler or a string.'.format( v)) if isinstance(v, str): dependency = dependency.union([v]) else: dependency = dependency.union(v.fields) return factor_pool, sorted(dependency) elif isinstance(factor_pool, list): factor_dict = {} dependency = set() k = 1 for i, f in enumerate(factor_pool): if isinstance(f, str): factor_dict[f] = f dependency = dependency.union([f]) elif isinstance(f, SecurityValueHolder): factor_dict[str(f)] = f dependency = dependency.union(f.fields) k += 1 return factor_dict, sorted(dependency) else: raise ValueError( '{0} is not in valid format as factors'.format(factor_pool))
def __init__(self, cost: np.ndarray, variance_target: float, factor_var: np.ndarray = None, factor_load: np.ndarray = None, factor_special: np.ndarray = None, variance: np.ndarray = None, cons_matrix: np.ndarray = None, lower_bound: Union[float, np.ndarray] = None, upper_bound: Union[float, np.ndarray] = None): super().__init__(cost, cons_matrix, lower_bound, upper_bound) require(factor_var is not None or variance is not None, ValueError, "factor var or total var should not all be empty") if factor_var is not None: self._factor_var = factor_var self._factor_load = factor_load self._factor_special = factor_special self._use_factor = True else: self._variance = variance self._use_factor = False require(variance_target >= 0, ValueError, "variance target can't be negative") self._var_target = variance_target
def _prepare(self): x = cp.Variable(self._n) constraints = [] if self._lower_bound is not None: require(isinstance(self._lower_bound, float) or len(self._lower_bound) == self._n, ValueError, "lower bounds must be a single value or an array with same size as x") constraints.append(x >= self._lower_bound) if self._upper_bound is not None: require(isinstance(self._upper_bound, float) or len(self._upper_bound) == self._n, ValueError, "upper bounds must be a single value or an array with same size as x") constraints.append(x <= self._upper_bound) if self._cons_matrix is not None: require(self._cons_matrix.shape[1] == self._n + 2, ValueError, "constraints must be a matrix with size as x + 2") constraints.append(self._cons_matrix[:, :self._n] @ x >= self._cons_matrix[:, self._n]) constraints.append(self._cons_matrix[:, :self._n] @ x <= self._cons_matrix[:, self._n + 1]) return x, constraints
def bounds(self, center): l_b, u_b = self.lower(center), self.upper(center) require(l_b <= u_b, ValueError, "lower bound should be lower then upper bound") return l_b, u_b
def fetch_train_phase(engine, alpha_factors: Union[Transformer, Iterable[object]], ref_date, frequency, universe, batch=1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0, fit_target: Union[Transformer, object] = None) -> dict: if isinstance(alpha_factors, Transformer): transformer = alpha_factors else: transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) horizon = map_freq(frequency) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() target_df, factor_df = df[['trade_date', 'code', 'dx']], df[['trade_date', 'code'] + transformer.names] target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \ _merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model, neutralized_risk) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): require( len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date)) end = dates[-2] start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0] else: end = dates[-1] start = dates[-batch] if batch <= len(dates) else dates[0] index = (date_label >= start) & (date_label <= end) this_raw_x = x_values[index] this_raw_y = y_values[index] this_code = codes[index] if risk_exp is not None: this_risk_exp = risk_exp[index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ne_y = factor_processing(this_raw_y, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ret = dict() ret['x_names'] = transformer.names ret['train'] = { 'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code } return ret