def calc(df_tpm, genes_for_estimation): if isinstance(genes_for_estimation, list): signature_df = df_tpm.ix[genes_for_estimation, [ 'reference_sample', 'reference_contamination']] else: signature_df = genes_for_estimation if not isinstance(signature_df, rpy2.robjects.vectors.DataFrame): signature_df = gg.convert_dataframe_to_r(signature_df, True) if signature_df is None: print genes_for_estimation print type(genes_for_estimation) raise ValueError("Here it is") mixture_df = df_tpm[['observed']] # for some arcane reason it won't work if you run it with one sample # only... mixture_df.insert(1, 'duplicate', mixture_df['observed']) mixture_df = gg.convert_dataframe_to_r(mixture_df, True) result = rpy2.robjects.r('DeconRNASeq')( mixture_df, signature_df, ) calc_percentage = result[0][1] # the contamination percentage calc_percentage_std = np.nan percentage, corrected = correct_with_percentage( df_tpm, 'observed', ['reference_sample'], ['reference_contamination'], calc_percentage) return { 'percentage': calc_percentage, 'percentage_std': calc_percentage_std, 'corrected': corrected, }
def calc(df_tpm, genes_for_estimation): rpy2.robjects.r("""call_undo = function(x) { library("Biobase") y = as.matrix(x) colnames(y) = c(1,2) rownames(y) = rownames(x) a = unlist(gene_expression_input(y)) markergene <- marker_gene_selection(a,lowper=0.4,highper=0.1,epsilon1=0.01,epsilon2=0.01) two_source_deconv(y,lowper=0.4,highper=0.1,epsilon1=0.01, epsilon2=0.01,return=0) } """ ) try: query_df = df_tpm[['observed', 'observed_1']].copy() # UNDO can't handle zeros in the dataset... query_df['observed'] += 1 query_df['observed_1'] += 1 x = rpy2.robjects.r('call_undo')( gg.convert_dataframe_to_r(query_df, True), ) calc_percentage = np.array(x)[0][0] percentage, corrected = correct_with_percentage( df_tpm, 'observed', ['reference_sample'], ['reference_contamination'], calc_percentage) except rpy2.rinterface.RRuntimeError as e: if 'infinite or missing values in' in str(e): calc_percentage = np.nan corrected = np.nan else: raise return { 'percentage': calc_percentage, # 'percentage_std': calc_percentage_std, 'corrected': corrected, # 'p-value': p_value }