def run_edgeR(gene_expression, bio_assignment, gene_names, batch_info=None, batch=True):
    if batch_info is None:
        batch = False
    r_counts = conversion_pydataframe(gene_expression)
    r_bio_group = conversion_pydataframe(bio_assignment)
    r_dge = r.DGEList(counts=r.t(r_counts), genes=gene_names)
    r.assign("dge", r_dge)
    r.assign("bio_group", r.factor(r_bio_group))
    r("dge$samples$bio_group <- bio_group")

    if batch:
        r_batch_group = conversion_pydataframe(batch_info)
        r.assign("batch_group", r.factor(r_batch_group))
        r("dge$samples$batch_group <- batch_group")

    r("""dge <- suppressWarnings(edgeR::calcNormFactors(dge))""")

    if not batch:
        r("""design <- model.matrix(~bio_group, data = dge$samples)""")
        r("""colnames(design) <- c("Intercept", "bio")""")

    if batch:
        r("""design <- model.matrix(~bio_group+batch_group, data = dge$samples)""")
        r("""colnames(design) <- c("Intercept", "bio", "batch")""")

    r("""dge <- estimateDisp(dge, design)""")

    r("""fit <- glmFit(dge, design)""")
    if not batch:
        r("""lrt <- glmLRT(fit)""")
    if batch:
        r("""lrt <- glmLRT(fit, coef="bio")""")
    return r("lrt$table$PValue")
Esempio n. 2
0
def ancova(lm1, lm2,  names=('lm1', 'lm2')):
    """
    Compares the slopes and intercepts of two linear models.  Currently this is
    quite limited in that it only compares single-variable linear models that
    have `x` and `y` attributes.

    Returns (pval of slope difference, pval of intercept difference).

    Recall that if the slope is significant, you can't really say anything
    about the intercept.

    """
    # R code, from the extremely useful blog:
    # http://r-eco-evo.blogspot.com/2011/08/
    #           comparing-two-regression-slopes-by.html
    #
    # model1 = aov(y~x*factor, data=df)
    # (interaction term on summary(model1)'s 3rd table line)
    #
    # model2 = aov(y~x+factor, data=df)
    # (2nd table line for "factor" in summary(model2) is the sig of intercept
    # diff)
    #
    # anova(model1, model2)
    #  does removing the interaction term affect the model fit?

    # Construct variables suitable for ANOVA/ANCOVA
    label1 = [names[0] for i in lm1.x]
    label2 = [names[1] for i in lm2.x]
    labels = r.factor(np.array(label1 + label2))
    xi = np.concatenate((lm1.x, lm2.x))
    yi = np.concatenate((lm1.y, lm2.y))

    # The workflow is to populate the formula as a separate environment.
    # This first formula includes the interaction term
    fmla1 = robjects.Formula('yi~xi*labels')
    fmla1.environment['xi'] = xi
    fmla1.environment['yi'] = yi
    fmla1.environment['labels'] = labels
    result1 = r('aov(%s)' % fmla1.r_repr())
    interaction_pval = r.summary(result1)[0].rx2('Pr(>F)')[2]

    # No interaction term
    fmla2 = robjects.Formula('yi~xi+labels')
    fmla2.environment['xi'] = xi
    fmla2.environment['yi'] = yi
    fmla2.environment['labels'] = labels
    result2 = r('aov(%s)' % fmla2.r_repr())
    intercept_pval = r.summary(result2)[0].rx2('Pr(>F)')[1]

    # TODO: anova(result1, result2)?

    return interaction_pval, intercept_pval
Esempio n. 3
0
                  rownames=['true'],
                  colnames=['predicted'])

# <headingcell level=4>

# Using non-base packages in Rpy2

# <codecell>

import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
r = robjects.r

e1071 = importr('e1071')
Yr = np2r(iris['Type'])
Yr = r.factor(Yr)
svm = e1071.svm(Xr, Yr)
yhat = r.predict(svm, Xr)
print r.table(yhat, Yr)

# <headingcell level=4>

# ggplot2 in python with Rpy2

# <markdowncell>

# Thanks to [Fei Yu](http://www.thefeiyu.com/) for this vignette.

# <codecell>

import rpy2.robjects as robjects
print pd.crosstab(iris['Type'], yhat_hclust, rownames=['true'], colnames=['predicted'])

# <headingcell level=4>

# Using non-base packages in Rpy2

# <codecell>

import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
r = robjects.r

e1071 = importr('e1071')
Yr = np2r(iris['Type'])
Yr = r.factor(Yr)
svm = e1071.svm(Xr, Yr)
yhat = r.predict(svm, Xr)
print r.table(yhat, Yr)

# <headingcell level=4>

# ggplot2 in python with Rpy2

# <markdowncell>

# Thanks to [Fei Yu](http://www.thefeiyu.com/) for this vignette.

# <codecell>

import rpy2.robjects as robjects