def run_edgeR(gene_expression, bio_assignment, gene_names, batch_info=None, batch=True):
    if batch_info is None:
        batch = False
    r_counts = conversion_pydataframe(gene_expression)
    r_bio_group = conversion_pydataframe(bio_assignment)
    r_dge = r.DGEList(counts=r.t(r_counts), genes=gene_names)
    r.assign("dge", r_dge)
    r.assign("bio_group", r.factor(r_bio_group))
    r("dge$samples$bio_group <- bio_group")

    if batch:
        r_batch_group = conversion_pydataframe(batch_info)
        r.assign("batch_group", r.factor(r_batch_group))
        r("dge$samples$batch_group <- batch_group")

    r("""dge <- suppressWarnings(edgeR::calcNormFactors(dge))""")

    if not batch:
        r("""design <- model.matrix(~bio_group, data = dge$samples)""")
        r("""colnames(design) <- c("Intercept", "bio")""")

    if batch:
        r("""design <- model.matrix(~bio_group+batch_group, data = dge$samples)""")
        r("""colnames(design) <- c("Intercept", "bio", "batch")""")

    r("""dge <- estimateDisp(dge, design)""")

    r("""fit <- glmFit(dge, design)""")
    if not batch:
        r("""lrt <- glmLRT(fit)""")
    if batch:
        r("""lrt <- glmLRT(fit, coef="bio")""")
    return r("lrt$table$PValue")
Esempio n. 2
0
 def calc_size_factors(self, method="TMM"):
     # Convert pandas dataframe to R dataframe
     r_dge = r.DGEList(self.count_df)
     # Calculate normalization factors
     r_dge = r.calcNormFactors(r_dge, method=method)
     size_factors = (np.array(r_dge.rx2('samples').rx2("lib.size")) *
                     np.array(r_dge.rx2("samples").rx2("norm.factors")))
     # convert to pandas series
     size_factors = pd.Series(size_factors, index=self.count_df.columns)
     # adjust size factors so that the maximum is 1.0
     size_factors = size_factors / size_factors.max()
     return size_factors
Esempio n. 3
0
    def norm_expr_vals(self, ref_col, method="TMM"):
        """
        Normalize expression values relative to a
        reference sample (using TMM normalization).

        Parameters:
        -----------
        - ref_col: Reference column
        - method: Method to use for normalization, e.g. 'TMM'
        """
        # Take only numeric values from dataframe
        r_counts = conversion_pydataframe(self.exp_obj.counts_df)
        r_dge = r.DGEList(r_counts)
        # Calculate normalization factors
        r_dge = r.calcNormFactors(r_dge, refColumn=ref_col, method=method)
        print r_dge[0:10]
        # Get counts per million
        r_cpm_result = r.cpm(r_dge)
        print "Counts per million: "
        print r_cpm_result[0:10]
        return r_dge, r_cpm_result