def __init__(self, window=float('inf'), mu_estimator=None, cov_estimator=None, min_history=None, max_leverage=1., method='mpt', q=0.01, gamma=0., allow_cash=False, **kwargs): """ :param window: Window for calculating mean and variance. Use float('inf') for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) :param allow_cash: Allow holding cash (weights doesn't have to sum to 1) """ if np.isinf(window): window = int(1e+8) min_history = min_history or 50 else: min_history = min_history or window super(MPT, self).__init__(min_history=min_history, **kwargs) self.window = window self.max_leverage = max_leverage self.method = method self.q = q self.gamma = gamma self.allow_cash = allow_cash if cov_estimator is None: cov_estimator = 'empirical' if isinstance(cov_estimator, basestring): if cov_estimator == 'empirical': # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == 'ledoit-wolf': cov_estimator = covariance.LedoitWolf() elif cov_estimator == 'graph-lasso': cov_estimator = covariance.GraphLasso() elif cov_estimator == 'oas': cov_estimator = covariance.OAS() else: raise NotImplemented('Unknown covariance estimator {}'.format(cov_estimator)) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator) if mu_estimator is None: mu_estimator = MuEstimator() if isinstance(mu_estimator, basestring): if mu_estimator == 'historical': mu_estimator = HistoricalEstimator(window) elif mu_estimator == 'sharpe': mu_estimator = MuEstimator() else: raise NotImplemented('Unknown mu estimator {}'.format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
def _train(self, train_data, params, verbose): import sklearn.covariance as sk_cov if verbose: print("Training {} ...".format(self.name)) start_time = time.time() try: covs = [] for x in train_data: est = sk_cov.GraphLasso(alpha=params['alpha'], max_iter=params['max_iter']) est.fit(x) covs.append(est.covariance_) except Exception as e: if verbose: print("\t{} failed with message: {}".format( self.name, e.message)) covs = None finish_time = time.time() if verbose: print("\tElapsed time {:.1f}s".format(finish_time - start_time)) return covs, None
def computeCovar(bed, shrinkMethod, fitIndividuals): eigen = dict([]) if (shrinkMethod in ['lw', 'oas', 'l1', 'cv']): import sklearn.covariance as cov t0 = time.time() print 'Estimating shrunk covariance using', shrinkMethod, 'estimator...' if (shrinkMethod == 'lw'): covEstimator = cov.LedoitWolf(assume_centered=True, block_size=5 * bed.val.shape[0]) elif (shrinkMethod == 'oas'): covEstimator = cov.OAS(assume_centered=True) elif (shrinkMethod == 'l1'): covEstimator = cov.GraphLassoCV(assume_centered=True, verbose=True) elif (shrinkMethod == 'cv'): shrunkEstimator = cov.ShrunkCovariance(assume_centered=True) param_grid = {'shrinkage': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]} covEstimator = sklearn.grid_search.GridSearchCV( shrunkEstimator, param_grid) else: raise Exception('unknown covariance regularizer') covEstimator.fit(bed.val[fitIndividuals, :].T) if (shrinkMethod == 'l1'): alpha = covEstimator.alpha_ print 'l1 alpha chosen:', alpha covEstimator2 = cov.GraphLasso(alpha=alpha, assume_centered=True, verbose=True) else: if (shrinkMethod == 'cv'): shrinkEstimator = clf.best_params_['shrinkage'] else: shrinkEstimator = covEstimator.shrinkage_ print 'shrinkage estimator:', shrinkEstimator covEstimator2 = cov.ShrunkCovariance(shrinkage=shrinkEstimator, assume_centered=True) covEstimator2.fit(bed.val.T) XXT = covEstimator2.covariance_ * bed.val.shape[1] print 'Done in %0.2f' % (time.time() - t0), 'seconds' else: print 'Computing kinship matrix...' t0 = time.time() XXT = symmetrize(blas.dsyrk(1.0, bed.val, lower=1)) print 'Done in %0.2f' % (time.time() - t0), 'seconds' try: shrinkParam = float(shrinkMethod) except: shrinkParam = -1 if (shrinkMethod == 'mylw'): XXT_fit = XXT[np.ix_(fitIndividuals, fitIndividuals)] sE2R = (np.sum(XXT_fit**2) - np.sum(np.diag(XXT_fit)**2)) / (bed.val.shape[1]**2) #temp = (bed.val**2).dot((bed.val.T)**2) temp = symmetrize( blas.dsyrk(1.0, bed.val[fitIndividuals, :]**2, lower=1)) sER2 = (temp.sum() - np.diag(temp).sum()) / bed.val.shape[1] shrinkParam = (sER2 - sE2R) / (sE2R * (bed.val.shape[1] - 1)) if (shrinkParam > 0): print 'shrinkage estimator:', 1 - shrinkParam XXT = (1 - shrinkParam) * XXT + bed.val.shape[ 1] * shrinkParam * np.eye(XXT.shape[0]) return XXT
def __init__( self, window=None, mu_estimator=None, cov_estimator=None, mu_window=None, cov_window=None, min_history=None, bounds=None, max_leverage=1.0, method="mpt", q=0.01, gamma=0.0, optimizer_options=None, force_weights=None, **kwargs, ): """ :param window: Window for calculating mean and variance. Use None for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. Default is 1 year :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) from https://en.wikipedia.org/wiki/Modern_portfolio_theory#Efficient_frontier_with_no_risk-free_asset q=2 is equivalent to full-kelly, q=1 is equivalent to half kelly :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) """ super().__init__(min_history=min_history, **kwargs) mu_window = mu_window or window cov_window = cov_window or window self.method = method self.q = q self.gamma = gamma self.bounds = bounds or {} self.force_weights = force_weights self.max_leverage = max_leverage self.optimizer_options = optimizer_options or {} if bounds and max_leverage != 1: raise NotImplemented( "max_leverage cannot be used with bounds, consider removing max_leverage and replace it with bounds1" ) if cov_estimator is None: cov_estimator = "empirical" if isinstance(cov_estimator, string_types): if cov_estimator == "empirical": # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == "ledoit-wolf": cov_estimator = covariance.LedoitWolf() elif cov_estimator == "graph-lasso": cov_estimator = covariance.GraphLasso() elif cov_estimator == "oas": cov_estimator = covariance.OAS() elif cov_estimator == "single-index": cov_estimator = SingleIndexCovariance() else: raise NotImplemented( "Unknown covariance estimator {}".format(cov_estimator) ) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window) if mu_estimator is None: mu_estimator = SharpeEstimator() if isinstance(mu_estimator, string_types): if mu_estimator == "historical": mu_estimator = HistoricalEstimator(window=mu_window) elif mu_estimator == "sharpe": mu_estimator = SharpeEstimator() else: raise NotImplemented("Unknown mu estimator {}".format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
print class_ix for time_ix in range(len_t): print( getF1(A_list[class_ix][time_ix], Theta_paper_list[class_ix][time_ix])) #--------------------------------------------------------------------------------------------------------- set_length = 10 alpha_set = np.logspace(-1, 1, set_length) Theta_glasso_list = [] for class_ix in range(len_class): Theta_c = [] for time_ix in range(len_t): for alpha in alpha_set: ml_glasso = cov.GraphLasso(alpha, assume_centered=False) ml_glasso.fit(X_list[class_ix][time_ix]) Theta = ml_glasso.get_precision() Theta_c.append(Theta) Theta_glasso_list.append(Theta_c) set_length = 51 alpha_set = np.logspace(-1, .5, set_length) Theta_c = [] class_ix = 0 time_ix = 0 for alpha in alpha_set: ml_glasso = cov.GraphLasso(alpha, assume_centered=False) ml_glasso.fit(X_list[class_ix][time_ix]) Theta = ml_glasso.get_precision() getF1(A_list[0][0], Theta)
def learnStructure(file_path_in, file_path_out, use_sample_weight): print "Run graphical Lasso..." round_to = 6 # round the graph data to a decimal label_x = "NumberOfSmellReports" # the label of variable x that we want to inference # Check if directories exits for p in file_path_out: checkAndCreateDir(p) # Read the datset df = pd.read_csv(file_path_in[0]) df = df[df.columns[1:]] # drop the index column col_names = df.columns print col_names # Read the sample weights if use_sample_weight: df_w = pd.read_csv(file_path_in[1]) df_w = df_w[df_w.columns[1:]] # drop the index column # Compute covariance if use_sample_weight: ts_mu = computeWeightedMean(df, df_w) # note that this is a pandas time series object df_cov = computeWeightedCov(df, df_w, ts_mu) else: df_cov = df.cov() # Run Graphical Lasso #model = sklcov.GraphLassoCV(cv=5, max_iter=1000, alphas=20) # used for choosing alpha model = sklcov.GraphLasso(alpha=3.5, max_iter=2000) # for transformed dataset with sample weights model.fit(df_cov) #print model.get_params(), model.cv_alphas_, model.alpha_ # Get the precision matrix prec = model.get_precision() prec[abs(prec) < 0.001] = 0 prec = np.round(prec, round_to) # Construct graph prec_triu = np.triu(prec, 1) # Get the upper triangle matrix without diagonal rows, cols = np.nonzero(prec_triu) rows = rows.tolist() cols = cols.tolist() G = nx.Graph() print "Number of edges: " + str(len(rows)) while len(rows) != 0: i = rows.pop(0) j = cols.pop(0) print "Edge: " + col_names[i] + " === " + col_names[j] G.add_edge(col_names[i], col_names[j], precision=round(prec[i,j],6)) # Add the diagonal of the prexision matrix and the mean to the graph for (node, value) in zip(col_names, np.diag(prec)): if G.has_node(node): nx.set_node_attributes(G, "precision", {node: value}) # Find the largest connected component #GC = max(nx.connected_component_subgraphs(G), key=len) # Find the connected component that contains the smell reports node for g in nx.connected_component_subgraphs(G): if g.has_node(label_x): GC = g break # Export the graph structure to json for d3.js visualization with open(file_path_out[0], "w") as out_file: json.dump(json_graph.node_link_data(GC), out_file) print "Graphical model created at " + file_path_out[0] # Export the precision matrix in the format of pandas dataframe df_prec = pd.DataFrame(data=prec, columns=col_names) df_prec.to_csv(file_path_out[1]) print "Precision matrix created at " + file_path_out[1]
def __init__(self, mu_estimator=None, cov_estimator=None, cov_window=None, min_history=None, bounds=None, max_leverage=1., method='mpt', q=0.01, gamma=0., optimizer_options=None, force_weights=None, **kwargs): """ :param window: Window for calculating mean and variance. Use None for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. Default is 1 year :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) """ super().__init__(min_history=min_history, **kwargs) self.method = method self.q = q self.gamma = gamma self.bounds = bounds self.force_weights = force_weights self.max_leverage = max_leverage self.optimizer_options = optimizer_options or {} if cov_estimator is None: cov_estimator = 'empirical' if isinstance(cov_estimator, string_types): if cov_estimator == 'empirical': # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == 'ledoit-wolf': cov_estimator = covariance.LedoitWolf() elif cov_estimator == 'graph-lasso': cov_estimator = covariance.GraphLasso() elif cov_estimator == 'oas': cov_estimator = covariance.OAS() elif cov_estimator == 'single-index': cov_estimator = SingleIndexCovariance() else: raise NotImplemented( 'Unknown covariance estimator {}'.format(cov_estimator)) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window) if mu_estimator is None: mu_estimator = SharpeEstimator() if isinstance(mu_estimator, string_types): if mu_estimator == 'historical': mu_estimator = HistoricalEstimator(window=cov_window) elif mu_estimator == 'sharpe': mu_estimator = SharpeEstimator() else: raise NotImplemented( 'Unknown mu estimator {}'.format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
######################################################### with pd.HDFStore("../input/train.h5", "r") as data_file: df = data_file.get("train") df = df[['timestamp', 'id', 'y']] df = remove_expired_ids(df) df = df[df.id < 1000] pivoted = df.pivot('id', 'timestamp') pivoted.fillna(0, inplace=True) x = np.array(pivoted) X = x.copy().T X /= X.std(axis=0) #model = covariance.GraphLassoCV(cv=2, n_jobs=-1) model = covariance.GraphLasso(alpha=1e-1) model.fit(X) sparse_covariance = model.covariance_ names = df.id.unique().astype(np.str) _, labels = cluster.affinity_propagation(sparse_covariance) n_labels = labels.max() groups = {} for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) groups[str(i)] = names[labels == i].astype(np.int) plot_y_in_cluster(df, group=3) plot_cluster(X, labels, model)