Ejemplo n.º 1
0
def lme_summary(output_dir: str, model: LMEModel, ndim=10) -> None:
    """ Summarizes the ordinary linear mixed effects model.

    Parameters
    ----------
    output_dir : str
        Directory where all of the regression results and
        summaries will be stored.
    model : LMEModel
        Linear Mixed Effects model that contains the model fit and the
        regression results.
    ndim : int
        The number of dimensions to summarize.
    """
    # log likelihood
    loglike = pd.Series({
        r.model.endog_names: r.model.loglike(r.params)
        for r in model.results
    })

    # Summary object
    smry = model.summary(ndim=10)

    t = _decorate_tree(model.tree, -loglike)
    p1 = radialplot(t, edge_color='color', figsize=(800, 800))
    p1.title.text = 'Loglikelihood of submodels'
    p1.title_location = 'above'
    p1.title.align = 'center'
    p1.title.text_font_size = '18pt'

    # 2D scatter plot for prediction on PB
    p2 = _projected_prediction(model)
    p3 = _projected_residuals(model)

    p23 = row(p2, p3)

    # Deposit all regression results
    _deposit_results(model, output_dir)

    index_fp = os.path.join(output_dir, 'index.html')
    with open(index_fp, 'w') as index_f:
        index_f.write('<html><body>\n')
        index_f.write('<h1>Simplicial Linear Mixed Effects Summary</h1>\n')
        index_f.write(smry.as_html())
        _deposit_results_html(index_f)
        ess_tree_html = file_html(p1, CDN, 'Loglikelihood')
        index_f.write(ess_tree_html)
        reg_smry_html = file_html(p23, CDN, 'Prediction and Residual plot')
        index_f.write(reg_smry_html)
        index_f.write('</body></html>\n')
Ejemplo n.º 2
0
    def test_basic_plot(self):
        self.maxDiff = None
        exp_edges = {
            'dest_node': ['0', '1', '2', 'y3'],
            'edge_color': ['#00FF00', '#00FF00', '#00FF00', '#FF0000'],
            'edge_width': [2, 2, 2, 2],
            'src_node': ['y3', 'y4', 'y3', 'y4'],
            'x0': [
                338.2612593838583, 193.1688862557773, 338.2612593838583,
                193.1688862557773
            ],
            'x1':
            [487.5, 12.499999999999972, 324.89684138234867, 338.2612593838583],
            'y0': [
                271.7282256126416, 365.95231443706376, 271.7282256126416,
                365.95231443706376
            ],
            'y1': [
                347.7691620070637, 483.2800610261029, 16.719938973897143,
                271.7282256126416
            ]
        }

        exp_nodes = {
            'child0': [np.nan, np.nan, np.nan, '0', '1'],
            'child1': [np.nan, np.nan, np.nan, '2', 'y3'],
            'color': ['#1C9099', '#1C9099', '#1C9099', '#FF999F', '#FF999F'],
            'hover_var': [None, None, None, None, None],
            'is_tip': [True, True, True, False, False],
            'node_size': [10, 10, 10, 10, 10],
            'x': [
                12.499999999999972, 487.5, 324.89684138234867,
                338.26125938385832, 193.16888625577729
            ],
            'y': [
                483.28006102610289, 347.7691620070637, 16.719938973897143,
                271.72822561264161, 365.95231443706376
            ]
        }
        np.random.seed(0)
        num_otus = 3  # otus
        x = np.random.rand(num_otus)
        dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x - y))
        lm = ward(dm.condensed_form())
        t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
        t = UnrootedDendrogram.from_tree(t)
        # incorporate colors in tree
        for i, n in enumerate(t.postorder(include_self=True)):
            if not n.is_tip():
                n.name = "y%d" % i
                n.color = '#FF999F'
                n.edge_color = '#FF0000'
                n.node_size = 10
            else:
                n.color = '#1C9099'
                n.edge_color = '#00FF00'
                n.node_size = 10
            n.length = np.random.rand() * 3
            n.edge_width = 2
        p = radialplot(t,
                       node_color='color',
                       edge_color='edge_color',
                       node_size='node_size',
                       edge_width='edge_width')

        for e in exp_edges.keys():
            if isinstance(exp_edges[e], float):
                npt.assert_allclose(p.renderers[0].data_source.data[e],
                                    np.array(exp_edges[e]))
            else:
                self.assertListEqual(list(p.renderers[0].data_source.data[e]),
                                     exp_edges[e])

        for e in exp_nodes.keys():
            self.assertListEqual(list(p.renderers[1].data_source.data[e]),
                                 exp_nodes[e])

        self.assertTrue(isinstance(t, TreeNode))
Ejemplo n.º 3
0
def ols_summary(output_dir: str, model: OLSModel, ndim=10) -> None:
    """ Summarizes the ordinary least squares fit.

    Parameters
    ----------
    output_dir : str
        Directory where all of the regression results and
        summaries will be stored.
    model : OLSModel
        Ordinary Least Squares model that contains the model fit and the
        regression results.
    ndim : int
        The number of dimensions to summarize.
    """
    # Cross validation
    cv = model.loo()
    # Relative importance of explanatory variables
    relimp = model.lovo()
    w, h = 400, 400  # plot width and height
    # Histogram of model mean squared error from cross validation
    mse_p = figure(title="Cross Validation Mean Squared Error",
                   plot_width=w,
                   plot_height=h)
    mse_hist, edges = np.histogram(cv.mse, density=True, bins=20)
    mse_p.quad(top=mse_hist,
               bottom=0,
               left=edges[:-1],
               right=edges[1:],
               fill_color="#FFFF00",
               line_color="#033649",
               fill_alpha=0.5,
               legend='CV Mean Squared Error')
    mse_p.ray(x=model.mse,
              y=0,
              length=h,
              angle=1.57079633,
              color='red',
              legend='Model Error',
              line_width=0.5)

    # Histogram of prediction error from cross validation
    pred_p = figure(title="Prediction Error", plot_width=w, plot_height=h)
    pred_hist, edges = np.histogram(cv.pred_err, density=True, bins=20)
    pred_p.quad(top=pred_hist,
                bottom=0,
                left=edges[:-1],
                right=edges[1:],
                fill_color="#00FFFF",
                line_color="#033649",
                fill_alpha=0.5,
                legend='Prediction Error')
    pred_p.ray(x=model.mse,
               y=0,
               length=h,
               angle=1.57079633,
               color='red',
               legend='Model Error',
               line_width=0.5)

    cvp = row(mse_p, pred_p)

    # Explained sum of squares
    ess = pd.Series({r.model.endog_names: r.ess for r in model.results})
    # Summary object
    smry = model.summary(ndim=10)

    t = _decorate_tree(model.tree, ess)

    p1 = radialplot(t, edge_color='color', figsize=(800, 800))
    p1.title.text = 'Explained Sum of Squares'
    p1.title_location = 'above'
    p1.title.align = 'center'
    p1.title.text_font_size = '18pt'

    # 2D scatter plot for prediction on PB
    p2 = _projected_prediction(model)
    p3 = _projected_residuals(model)

    p23 = row(p2, p3)

    _deposit_results(model, output_dir)

    index_fp = os.path.join(output_dir, 'index.html')
    with open(index_fp, 'w') as index_f:
        index_f.write('<html><body>\n')
        index_f.write('<h1>Simplicial Linear Regression Summary</h1>\n')
        index_f.write(smry.as_html())
        index_f.write('<th>Relative importance</th>\n')
        index_f.write(relimp.to_html())
        _deposit_results_html(index_f)
        index_f.write('<th>Cross Validation</th>')
        cv_html = file_html(cvp, CDN, 'Cross Validation')
        index_f.write(cv_html)
        ess_tree_html = file_html(p1, CDN, 'Explained Sum of Squares')
        index_f.write(ess_tree_html)
        reg_smry_html = file_html(p23, CDN, 'Prediction and Residual plot')
        index_f.write(reg_smry_html)
        index_f.write('</body></html>\n')
Ejemplo n.º 4
0
    def test_basic_plot(self):
        self.maxDiff = None
        exp_edges = {'dest_node': ['0', '1', '2', 'y3'],
                     'edge_color': ['#00FF00', '#00FF00',
                                    '#00FF00', '#FF0000'],
                     'edge_width': [2, 2, 2, 2],
                     'src_node': ['y3', 'y4', 'y3', 'y4'],
                     'x0': [338.2612593838583,
                            193.1688862557773,
                            338.2612593838583,
                            193.1688862557773],
                     'x1': [487.5, 12.499999999999972,
                            324.89684138234867, 338.2612593838583],
                     'y0': [271.7282256126416,
                            365.95231443706376,
                            271.7282256126416,
                            365.95231443706376],
                     'y1': [347.7691620070637,
                            483.2800610261029,
                            16.719938973897143,
                            271.7282256126416]}

        exp_nodes = {'child0': [np.nan, np.nan, np.nan, '0', '1'],
                     'child1': [np.nan, np.nan, np.nan, '2', 'y3'],
                     'color': ['#1C9099', '#1C9099', '#1C9099',
                               '#FF999F', '#FF999F'],
                     'hover_var': [None, None, None, None, None],
                     'is_tip': [True, True, True, False, False],
                     'node_size': [10, 10, 10, 10, 10],
                     'x': [487.5,
                           12.499999999999972,
                           324.89684138234867,
                           338.26125938385832,
                           193.16888625577729],
                     'y': [347.7691620070637,
                           483.28006102610289,
                           16.719938973897143,
                           271.72822561264161,
                           365.95231443706376]}
        np.random.seed(0)
        num_otus = 3  # otus
        x = np.random.rand(num_otus)
        dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
        t = UnrootedDendrogram.from_tree(t)
        # incorporate colors in tree
        for i, n in enumerate(t.postorder(include_self=True)):
            if not n.is_tip():
                n.name = "y%d" % i
                n.color = '#FF999F'
                n.edge_color = '#FF0000'
                n.node_size = 10
            else:
                n.color = '#1C9099'
                n.edge_color = '#00FF00'
                n.node_size = 10
            n.length = np.random.rand()*3
            n.edge_width = 2
        p = radialplot(t, node_color='color', edge_color='edge_color',
                       node_size='node_size', edge_width='edge_width')

        for e in exp_edges.keys():
            self.assertListEqual(
                list(p.renderers[0].data_source.data[e]),
                exp_edges[e])

        for e in exp_nodes.keys():
            self.assertListEqual(
                list(p.renderers[1].data_source.data[e]),
                exp_nodes[e])

        self.assertTrue(isinstance(t, TreeNode))
Ejemplo n.º 5
0
def lme_summary(output_dir: str, model: LMEModel, tree: TreeNode) -> None:
    """ Summarizes the ordinary linear mixed effects model.

    Parameters
    ----------
    output_dir : str
        Directory where all of the regression results and
        summaries will be stored.
    model : LMEModel
        Linear Mixed Effects model that contains the model fit and the
        regression results.
    tree : TreeNode
        Tree object that defines the partitions of the features. Each of the
        leaves correspond to the balances in the model.
    """
    # log likelihood
    loglike = pd.Series({
        r.model.endog_names: r.model.loglike(r.params)
        for r in model.results
    })
    w, h = 500, 300  # plot width and height
    # Summary object
    smry = model.summary()

    t = _decorate_tree(tree, -loglike)

    p1 = radialplot(t, figsize=(800, 800))
    p1.title.text = 'Loglikelihood of submodels'
    p1.title_location = 'above'
    p1.title.align = 'center'
    p1.title.text_font_size = '18pt'

    # 2D scatter plot for prediction on PB
    p2 = _projected_prediction(model, plot_width=w, plot_height=h)
    p3 = _projected_residuals(model, plot_width=w, plot_height=h)
    hm_p = _heatmap_summary(model.pvalues.T,
                            model.coefficients().T,
                            plot_width=900,
                            plot_height=400)

    # combine the cross validation, explained sum of squares tree and
    # residual plots into a single plot
    p = row(column(p2, p3), p1)
    p = column(hm_p, p)

    # Deposit all regression results
    _deposit_results(model, output_dir)

    index_fp = os.path.join(output_dir, 'index.html')
    with open(index_fp, 'w') as index_f:
        index_f.write('<html><body>\n')
        index_f.write('<h1>Simplicial Linear Mixed Effects Summary</h1>\n')
        index_f.write(smry.as_html())
        index_f.write(('<th>Coefficients</th>\n'
                       '<a href="coefficients.csv">'
                       'Download as CSV</a><br>\n'
                       '<th>Coefficient pvalues</th>\n'
                       '<a href="pvalues.csv">'
                       'Download as CSV</a><br>\n'
                       '<th>Predicted Balances</th>\n'
                       '<a href="predicted.csv">'
                       'Download as CSV</a><br>\n'
                       '<th>Residuals</th>\n'
                       '<a href="residuals.csv">'
                       'Download as CSV</a><br>\n'))

        diag_html = file_html(p, CDN, 'Diagnostic plots')
        index_f.write(diag_html)
        index_f.write('</body></html>\n')
Ejemplo n.º 6
0
def ols_summary(output_dir: str, model: OLSModel, tree: TreeNode) -> None:
    """ Summarizes the ordinary least squares fit.

    Parameters
    ----------
    output_dir : str
        Directory where all of the regression results and
        summaries will be stored.
    model : OLSModel
        Ordinary Least Squares model that contains the model fit and the
        regression results.
    tree : TreeNode
        Tree object that defines the partitions of the features. Each of the
        leaves correspond to the balances in the model.
    """
    # Cross validation
    w, h = 500, 300  # plot width and height

    # Explained sum of squares
    ess = model.ess
    # Summary object
    _k, _l = model.kfold(), model.lovo()
    smry = model.summary(_k, _l)
    _deposit_results(model, output_dir)
    t = _decorate_tree(tree, ess)

    p1 = radialplot(t, figsize=(800, 800))
    p1.title.text = 'Explained Sum of Squares'
    p1.title_location = 'above'
    p1.title.align = 'center'
    p1.title.text_font_size = '18pt'

    # 2D scatter plot for prediction on PB
    p2 = _projected_prediction(model, plot_width=w, plot_height=h)
    p3 = _projected_residuals(model, plot_width=w, plot_height=h)
    hm_p = _heatmap_summary(model.pvalues.T, model.coefficients().T)

    # combine the cross validation, explained sum of squares tree and
    # residual plots into a single plot
    p = row(column(p2, p3), p1)
    p = column(hm_p, p)
    index_fp = os.path.join(output_dir, 'index.html')
    with open(index_fp, 'w') as index_f:
        index_f.write('<html><body>\n')
        index_f.write('<h1>Simplicial Linear Regression Summary</h1>\n')
        index_f.write(smry.as_html())
        index_f.write(('<th>Coefficients</th>\n'
                       '<a href="coefficients.csv">'
                       'Download as CSV</a><br>\n'
                       '<th>Coefficient pvalues</th>\n'
                       '<a href="pvalues.csv">'
                       'Download as CSV</a><br>\n'
                       '<th>Predicted Balances</th>\n'
                       '<a href="predicted.csv">'
                       'Download as CSV</a><br>\n'
                       '<th>Residuals</th>\n'
                       '<a href="residuals.csv">'
                       'Download as CSV</a><br>\n'))

        plot_html = file_html(p, CDN, 'Diagnostics')
        index_f.write(plot_html)
        index_f.write('</body></html>\n')
Ejemplo n.º 7
0
def lme_summary(output_dir: str, model: LMEModel, tree: TreeNode) -> None:
    """ Summarizes the ordinary linear mixed effects model.

    Parameters
    ----------
    output_dir : str
        Directory where all of the regression results and
        summaries will be stored.
    model : LMEModel
        Linear Mixed Effects model that contains the model fit and the
        regression results.
    tree : TreeNode
        Tree object that defines the partitions of the features. Each of the
        leaves correspond to the balances in the model.
    """
    # log likelihood
    loglike = pd.Series({r.model.endog_names: r.model.loglike(r.params)
                         for r in model.results})
    w, h = 500, 300  # plot width and height
    # Summary object
    smry = model.summary()

    t = _decorate_tree(tree, -loglike)

    p1 = radialplot(t, figsize=(800, 800))
    p1.title.text = 'Loglikelihood of submodels'
    p1.title_location = 'above'
    p1.title.align = 'center'
    p1.title.text_font_size = '18pt'

    # 2D scatter plot for prediction on PB
    p2 = _projected_prediction(model, plot_width=w, plot_height=h)
    p3 = _projected_residuals(model, plot_width=w, plot_height=h)
    hm_p = _heatmap_summary(model.pvalues.T, model.coefficients().T,
                            plot_width=900, plot_height=400)

    # combine the cross validation, explained sum of squares tree and
    # residual plots into a single plot
    p = row(column(p2, p3), p1)
    p = column(hm_p, p)

    # Deposit all regression results
    _deposit_results(model, output_dir)

    index_fp = os.path.join(output_dir, 'index.html')
    with open(index_fp, 'w') as index_f:
        index_f.write('<html><body>\n')
        index_f.write('<h1>Simplicial Linear Mixed Effects Summary</h1>\n')
        index_f.write(smry.as_html())
        index_f.write(
            ('<th>Coefficients</th>\n'
             '<a href="coefficients.csv">'
             'Download as CSV</a><br>\n'
             '<th>Coefficient pvalues</th>\n'
             '<a href="pvalues.csv">'
             'Download as CSV</a><br>\n'
             '<th>FDR corrected coefficient pvalues</th>\n'
             '<a href="fdr-corrected-pvalues.csv">'
             'Download as CSV</a><br>\n'
             '<th>Predicted Balances</th>\n'
             '<a href="predicted.csv">'
             'Download as CSV</a><br>\n'
             '<th>Residuals</th>\n'
             '<a href="residuals.csv">'
             'Download as CSV</a><br>\n')
        )

        diag_html = file_html(p, CDN, 'Diagnostic plots')
        index_f.write(diag_html)
        index_f.write('</body></html>\n')
Ejemplo n.º 8
0
def ols_summary(output_dir: str, model: OLSModel,
                tree: TreeNode) -> None:
    """ Summarizes the ordinary least squares fit.

    Parameters
    ----------
    output_dir : str
        Directory where all of the regression results and
        summaries will be stored.
    model : OLSModel
        Ordinary Least Squares model that contains the model fit and the
        regression results.
    tree : TreeNode
        Tree object that defines the partitions of the features. Each of the
        leaves correspond to the balances in the model.
    """
    # Cross validation
    w, h = 500, 300  # plot width and height

    # Explained sum of squares
    ess = model.ess
    # Summary object
    _k, _l = model.kfold(), model.lovo()
    smry = model.summary(_k, _l)
    _deposit_results(model, output_dir)
    t = _decorate_tree(tree, ess)

    p1 = radialplot(t, figsize=(800, 800))
    p1.title.text = 'Explained Sum of Squares'
    p1.title_location = 'above'
    p1.title.align = 'center'
    p1.title.text_font_size = '18pt'

    # 2D scatter plot for prediction on PB
    p2 = _projected_prediction(model, plot_width=w, plot_height=h)
    p3 = _projected_residuals(model, plot_width=w, plot_height=h)
    hm_p = _heatmap_summary(model.pvalues.T, model.coefficients().T)

    # combine the cross validation, explained sum of squares tree and
    # residual plots into a single plot
    p = row(column(p2, p3), p1)
    p = column(hm_p, p)
    index_fp = os.path.join(output_dir, 'index.html')
    with open(index_fp, 'w') as index_f:
        index_f.write('<html><body>\n')
        index_f.write('<h1>Simplicial Linear Regression Summary</h1>\n')
        index_f.write(smry.as_html())
        index_f.write(
            ('<th>Coefficients</th>\n'
             '<a href="coefficients.csv">'
             'Download as CSV</a><br>\n'
             '<th>Coefficient pvalues</th>\n'
             '<a href="pvalues.csv">'
             'Download as CSV</a><br>\n'
             '<th>FDR corrected coefficient pvalues</th>\n'
             '<a href="fdr-corrected-pvalues.csv">'
             'Download as CSV</a><br>\n'
             '<th>Predicted Balances</th>\n'
             '<a href="predicted.csv">'
             'Download as CSV</a><br>\n'
             '<th>Residuals</th>\n'
             '<a href="residuals.csv">'
             'Download as CSV</a><br>\n')
        )

        plot_html = file_html(p, CDN, 'Diagnostics')
        index_f.write(plot_html)
        index_f.write('</body></html>\n')