Example #1
0
    def calc(df_tpm, genes_for_estimation):
        if isinstance(genes_for_estimation, list):
            signature_df = df_tpm.ix[genes_for_estimation, [
                'reference_sample', 'reference_contamination']]
        else:
            signature_df = genes_for_estimation
        if not isinstance(signature_df, rpy2.robjects.vectors.DataFrame):
            signature_df = gg.convert_dataframe_to_r(signature_df, True)
        if signature_df is None:
            print genes_for_estimation
            print type(genes_for_estimation)
            raise ValueError("Here it is")

        mixture_df = df_tpm[['observed']]
        # for some arcane reason it won't work if you run it with one sample
        # only...
        mixture_df.insert(1, 'duplicate', mixture_df['observed'])
        mixture_df = gg.convert_dataframe_to_r(mixture_df, True)
        result = rpy2.robjects.r('DeconRNASeq')(
            mixture_df,
            signature_df,
        )
        calc_percentage = result[0][1]  # the contamination percentage
        calc_percentage_std = np.nan
        percentage, corrected = correct_with_percentage(
            df_tpm,
            'observed',
            ['reference_sample'],
            ['reference_contamination'],
            calc_percentage)
        return {
            'percentage': calc_percentage,
            'percentage_std': calc_percentage_std,
            'corrected': corrected,
        }
Example #2
0
    def calc(df_tpm, genes_for_estimation):
        rpy2.robjects.r("""call_undo = function(x) {
            library("Biobase")
            y = as.matrix(x)
            colnames(y) = c(1,2)
            rownames(y) = rownames(x)
            a = unlist(gene_expression_input(y))
            markergene <- marker_gene_selection(a,lowper=0.4,highper=0.1,epsilon1=0.01,epsilon2=0.01)
            two_source_deconv(y,lowper=0.4,highper=0.1,epsilon1=0.01, epsilon2=0.01,return=0)
            }
            """
                        )
        try:
            query_df = df_tpm[['observed', 'observed_1']].copy()
            # UNDO can't handle zeros in the dataset...
            query_df['observed'] += 1
            query_df['observed_1'] += 1

            x = rpy2.robjects.r('call_undo')(
                gg.convert_dataframe_to_r(query_df, True),
            )
            calc_percentage = np.array(x)[0][0]
            percentage, corrected = correct_with_percentage(
                df_tpm,
                'observed',
                ['reference_sample'],
                ['reference_contamination'],
                calc_percentage)
        except rpy2.rinterface.RRuntimeError as e:
            if 'infinite or missing values in' in str(e):
                calc_percentage = np.nan
                corrected = np.nan
            else:
                raise

        return {
            'percentage': calc_percentage,
            # 'percentage_std': calc_percentage_std,
            'corrected': corrected,
            # 'p-value': p_value
        }