Exemple #1
0
    def __init__(self, window=float('inf'), mu_estimator=None, cov_estimator=None,
                 min_history=None, max_leverage=1., method='mpt', q=0.01, gamma=0., allow_cash=False, **kwargs):
        """
        :param window: Window for calculating mean and variance. Use float('inf') for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods.
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        :param allow_cash: Allow holding cash (weights doesn't have to sum to 1)
        """
        if np.isinf(window):
            window = int(1e+8)
            min_history = min_history or 50
        else:
            min_history = min_history or window

        super(MPT, self).__init__(min_history=min_history, **kwargs)
        self.window = window
        self.max_leverage = max_leverage
        self.method = method
        self.q = q
        self.gamma = gamma
        self.allow_cash = allow_cash

        if cov_estimator is None:
            cov_estimator = 'empirical'

        if isinstance(cov_estimator, basestring):
            if cov_estimator == 'empirical':
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == 'ledoit-wolf':
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == 'graph-lasso':
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == 'oas':
                cov_estimator = covariance.OAS()
            else:
                raise NotImplemented('Unknown covariance estimator {}'.format(cov_estimator))

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator)

        if mu_estimator is None:
            mu_estimator = MuEstimator()

        if isinstance(mu_estimator, basestring):
            if mu_estimator == 'historical':
                mu_estimator = HistoricalEstimator(window)
            elif mu_estimator == 'sharpe':
                mu_estimator = MuEstimator()
            else:
                raise NotImplemented('Unknown mu estimator {}'.format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
Exemple #2
0
 def _train(self, train_data, params, verbose):
     import sklearn.covariance as sk_cov
     if verbose:
         print("Training {} ...".format(self.name))
     start_time = time.time()
     try:
         covs = []
         for x in train_data:
             est = sk_cov.GraphLasso(alpha=params['alpha'],
                                     max_iter=params['max_iter'])
             est.fit(x)
             covs.append(est.covariance_)
     except Exception as e:
         if verbose:
             print("\t{} failed with message: {}".format(
                 self.name, e.message))
         covs = None
     finish_time = time.time()
     if verbose:
         print("\tElapsed time {:.1f}s".format(finish_time - start_time))
     return covs, None
Exemple #3
0
def computeCovar(bed, shrinkMethod, fitIndividuals):
    eigen = dict([])

    if (shrinkMethod in ['lw', 'oas', 'l1', 'cv']):
        import sklearn.covariance as cov
        t0 = time.time()
        print 'Estimating shrunk covariance using', shrinkMethod, 'estimator...'

        if (shrinkMethod == 'lw'):
            covEstimator = cov.LedoitWolf(assume_centered=True,
                                          block_size=5 * bed.val.shape[0])
        elif (shrinkMethod == 'oas'):
            covEstimator = cov.OAS(assume_centered=True)
        elif (shrinkMethod == 'l1'):
            covEstimator = cov.GraphLassoCV(assume_centered=True, verbose=True)
        elif (shrinkMethod == 'cv'):
            shrunkEstimator = cov.ShrunkCovariance(assume_centered=True)
            param_grid = {'shrinkage': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]}
            covEstimator = sklearn.grid_search.GridSearchCV(
                shrunkEstimator, param_grid)
        else:
            raise Exception('unknown covariance regularizer')

        covEstimator.fit(bed.val[fitIndividuals, :].T)
        if (shrinkMethod == 'l1'):
            alpha = covEstimator.alpha_
            print 'l1 alpha chosen:', alpha
            covEstimator2 = cov.GraphLasso(alpha=alpha,
                                           assume_centered=True,
                                           verbose=True)
        else:
            if (shrinkMethod == 'cv'):
                shrinkEstimator = clf.best_params_['shrinkage']
            else:
                shrinkEstimator = covEstimator.shrinkage_
            print 'shrinkage estimator:', shrinkEstimator
            covEstimator2 = cov.ShrunkCovariance(shrinkage=shrinkEstimator,
                                                 assume_centered=True)
        covEstimator2.fit(bed.val.T)
        XXT = covEstimator2.covariance_ * bed.val.shape[1]
        print 'Done in %0.2f' % (time.time() - t0), 'seconds'

    else:
        print 'Computing kinship matrix...'
        t0 = time.time()
        XXT = symmetrize(blas.dsyrk(1.0, bed.val, lower=1))
        print 'Done in %0.2f' % (time.time() - t0), 'seconds'
        try:
            shrinkParam = float(shrinkMethod)
        except:
            shrinkParam = -1
        if (shrinkMethod == 'mylw'):
            XXT_fit = XXT[np.ix_(fitIndividuals, fitIndividuals)]
            sE2R = (np.sum(XXT_fit**2) -
                    np.sum(np.diag(XXT_fit)**2)) / (bed.val.shape[1]**2)
            #temp = (bed.val**2).dot((bed.val.T)**2)
            temp = symmetrize(
                blas.dsyrk(1.0, bed.val[fitIndividuals, :]**2, lower=1))
            sER2 = (temp.sum() - np.diag(temp).sum()) / bed.val.shape[1]
            shrinkParam = (sER2 - sE2R) / (sE2R * (bed.val.shape[1] - 1))
        if (shrinkParam > 0):
            print 'shrinkage estimator:', 1 - shrinkParam
            XXT = (1 - shrinkParam) * XXT + bed.val.shape[
                1] * shrinkParam * np.eye(XXT.shape[0])

    return XXT
    def __init__(
        self,
        window=None,
        mu_estimator=None,
        cov_estimator=None,
        mu_window=None,
        cov_window=None,
        min_history=None,
        bounds=None,
        max_leverage=1.0,
        method="mpt",
        q=0.01,
        gamma=0.0,
        optimizer_options=None,
        force_weights=None,
        **kwargs,
    ):
        """
        :param window: Window for calculating mean and variance. Use None for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods. Default is 1 year
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
            from https://en.wikipedia.org/wiki/Modern_portfolio_theory#Efficient_frontier_with_no_risk-free_asset
            q=2 is equivalent to full-kelly, q=1 is equivalent to half kelly
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        """
        super().__init__(min_history=min_history, **kwargs)
        mu_window = mu_window or window
        cov_window = cov_window or window
        self.method = method
        self.q = q
        self.gamma = gamma
        self.bounds = bounds or {}
        self.force_weights = force_weights
        self.max_leverage = max_leverage
        self.optimizer_options = optimizer_options or {}

        if bounds and max_leverage != 1:
            raise NotImplemented(
                "max_leverage cannot be used with bounds, consider removing max_leverage and replace it with bounds1"
            )

        if cov_estimator is None:
            cov_estimator = "empirical"

        if isinstance(cov_estimator, string_types):
            if cov_estimator == "empirical":
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == "ledoit-wolf":
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == "graph-lasso":
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == "oas":
                cov_estimator = covariance.OAS()
            elif cov_estimator == "single-index":
                cov_estimator = SingleIndexCovariance()
            else:
                raise NotImplemented(
                    "Unknown covariance estimator {}".format(cov_estimator)
                )

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window)

        if mu_estimator is None:
            mu_estimator = SharpeEstimator()

        if isinstance(mu_estimator, string_types):
            if mu_estimator == "historical":
                mu_estimator = HistoricalEstimator(window=mu_window)
            elif mu_estimator == "sharpe":
                mu_estimator = SharpeEstimator()
            else:
                raise NotImplemented("Unknown mu estimator {}".format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
Exemple #5
0
    print class_ix
    for time_ix in range(len_t):
        print(
            getF1(A_list[class_ix][time_ix],
                  Theta_paper_list[class_ix][time_ix]))
#---------------------------------------------------------------------------------------------------------

set_length = 10
alpha_set = np.logspace(-1, 1, set_length)

Theta_glasso_list = []
for class_ix in range(len_class):
    Theta_c = []
    for time_ix in range(len_t):
        for alpha in alpha_set:
            ml_glasso = cov.GraphLasso(alpha, assume_centered=False)
            ml_glasso.fit(X_list[class_ix][time_ix])
            Theta = ml_glasso.get_precision()
            Theta_c.append(Theta)
        Theta_glasso_list.append(Theta_c)

set_length = 51
alpha_set = np.logspace(-1, .5, set_length)
Theta_c = []
class_ix = 0
time_ix = 0
for alpha in alpha_set:
    ml_glasso = cov.GraphLasso(alpha, assume_centered=False)
    ml_glasso.fit(X_list[class_ix][time_ix])
    Theta = ml_glasso.get_precision()
    getF1(A_list[0][0], Theta)
Exemple #6
0
def learnStructure(file_path_in, file_path_out, use_sample_weight):
    print "Run graphical Lasso..."
    round_to = 6 # round the graph data to a decimal
    label_x = "NumberOfSmellReports" # the label of variable x that we want to inference

    # Check if directories exits
    for p in file_path_out:
        checkAndCreateDir(p)

    # Read the datset
    df = pd.read_csv(file_path_in[0])
    df = df[df.columns[1:]] # drop the index column
    col_names = df.columns
    print col_names
    
    # Read the sample weights
    if use_sample_weight:
        df_w = pd.read_csv(file_path_in[1])
        df_w = df_w[df_w.columns[1:]] # drop the index column

    # Compute covariance
    if use_sample_weight:
        ts_mu = computeWeightedMean(df, df_w) # note that this is a pandas time series object
        df_cov = computeWeightedCov(df, df_w, ts_mu)
    else:
        df_cov = df.cov()

    # Run Graphical Lasso
    #model = sklcov.GraphLassoCV(cv=5, max_iter=1000, alphas=20) # used for choosing alpha
    model = sklcov.GraphLasso(alpha=3.5, max_iter=2000) # for transformed dataset with sample weights
    model.fit(df_cov)
    #print model.get_params(), model.cv_alphas_, model.alpha_

    # Get the precision matrix
    prec = model.get_precision()
    prec[abs(prec) < 0.001] = 0
    prec = np.round(prec, round_to)

    # Construct graph
    prec_triu = np.triu(prec, 1) # Get the upper triangle matrix without diagonal
    rows, cols = np.nonzero(prec_triu)
    rows = rows.tolist()
    cols = cols.tolist()
    G = nx.Graph()
    print "Number of edges: " + str(len(rows))
    while len(rows) != 0:
        i = rows.pop(0)
        j = cols.pop(0)
        print "Edge: " + col_names[i] + " === " + col_names[j]
        G.add_edge(col_names[i], col_names[j], precision=round(prec[i,j],6))
    
    # Add the diagonal of the prexision matrix and the mean to the graph
    for (node, value) in zip(col_names, np.diag(prec)):
        if G.has_node(node):
            nx.set_node_attributes(G, "precision", {node: value})

    # Find the largest connected component
    #GC = max(nx.connected_component_subgraphs(G), key=len)

    # Find the connected component that contains the smell reports node
    for g in nx.connected_component_subgraphs(G):
        if g.has_node(label_x):
            GC = g
            break
    
    # Export the graph structure to json for d3.js visualization
    with open(file_path_out[0], "w") as out_file:
        json.dump(json_graph.node_link_data(GC), out_file)
    print "Graphical model created at " + file_path_out[0]

    # Export the precision matrix in the format of pandas dataframe
    df_prec = pd.DataFrame(data=prec, columns=col_names)
    df_prec.to_csv(file_path_out[1])
    print "Precision matrix created at " + file_path_out[1]
Exemple #7
0
    def __init__(self,
                 mu_estimator=None,
                 cov_estimator=None,
                 cov_window=None,
                 min_history=None,
                 bounds=None,
                 max_leverage=1.,
                 method='mpt',
                 q=0.01,
                 gamma=0.,
                 optimizer_options=None,
                 force_weights=None,
                 **kwargs):
        """
        :param window: Window for calculating mean and variance. Use None for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods. Default is 1 year
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        """
        super().__init__(min_history=min_history, **kwargs)
        self.method = method
        self.q = q
        self.gamma = gamma
        self.bounds = bounds
        self.force_weights = force_weights
        self.max_leverage = max_leverage
        self.optimizer_options = optimizer_options or {}

        if cov_estimator is None:
            cov_estimator = 'empirical'

        if isinstance(cov_estimator, string_types):
            if cov_estimator == 'empirical':
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == 'ledoit-wolf':
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == 'graph-lasso':
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == 'oas':
                cov_estimator = covariance.OAS()
            elif cov_estimator == 'single-index':
                cov_estimator = SingleIndexCovariance()
            else:
                raise NotImplemented(
                    'Unknown covariance estimator {}'.format(cov_estimator))

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator,
                                                window=cov_window)

        if mu_estimator is None:
            mu_estimator = SharpeEstimator()

        if isinstance(mu_estimator, string_types):
            if mu_estimator == 'historical':
                mu_estimator = HistoricalEstimator(window=cov_window)
            elif mu_estimator == 'sharpe':
                mu_estimator = SharpeEstimator()
            else:
                raise NotImplemented(
                    'Unknown mu estimator {}'.format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
#########################################################
with pd.HDFStore("../input/train.h5", "r") as data_file:
    df = data_file.get("train")

df = df[['timestamp', 'id', 'y']]
df = remove_expired_ids(df)
df = df[df.id < 1000]
pivoted = df.pivot('id', 'timestamp')
pivoted.fillna(0, inplace=True)
x = np.array(pivoted)
X = x.copy().T
X /= X.std(axis=0)

#model = covariance.GraphLassoCV(cv=2, n_jobs=-1)
model = covariance.GraphLasso(alpha=1e-1)
model.fit(X)

sparse_covariance = model.covariance_
names = df.id.unique().astype(np.str)
_, labels = cluster.affinity_propagation(sparse_covariance)
n_labels = labels.max()

groups = {}
for i in range(n_labels + 1):
    print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
    groups[str(i)] = names[labels == i].astype(np.int)

plot_y_in_cluster(df, group=3)
plot_cluster(X, labels, model)