def R_norm_ma_lowess(exp_obj, pairs, frac=0.75, value_method="tc"): """ Use R lowess. """ # Total counts normalized dataframe of counts if value_method == "tc": df = norm_tc(exp_obj) df = utils.drop_null_rows(df, columns=utils.flatten(pairs)) else: raise Exception("Unknown value method %s" %(value_method)) num_pairs = len(pairs) print("LOWESS MA normalizing %d pairs" %(num_pairs)) norm_df = df.copy() norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs)) for pair in pairs: x, y = df[pair[0]], df[pair[1]] M = np.log2(x) - np.log2(y) # A = average intensity 1/2(XY) A = 0.5 * (np.log2(x) + np.log2(y)) # Fit M ~ A corrected_M, corrected_factor = R_run_loess(A, M) corrected_x = 2**((2*A + corrected_M)/2.) corrected_y = 2**((2*A - corrected_M)/2.) del norm_df[pair[0]] del norm_df[pair[1]] norm_df[pair[0]] = corrected_x norm_df[pair[1]] = corrected_y return norm_df, df
def R_norm_ma_lowess(exp_obj, pairs, frac=0.75, value_method="tc"): """ Use R lowess. """ # Total counts normalized dataframe of counts if value_method == "tc": df = norm_tc(exp_obj) df = utils.drop_null_rows(df, columns=utils.flatten(pairs)) else: raise Exception, "Unknown value method %s" %(value_method) num_pairs = len(pairs) print "LOWESS MA normalizing %d pairs" %(num_pairs) norm_df = df.copy() norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs)) for pair in pairs: x, y = df[pair[0]], df[pair[1]] M = np.log2(x) - np.log2(y) # A = average intensity 1/2(XY) A = 0.5 * (np.log2(x) + np.log2(y)) # Fit M ~ A corrected_M, corrected_factor = R_run_loess(A, M) corrected_x = 2**((2*A + corrected_M)/2.) corrected_y = 2**((2*A - corrected_M)/2.) del norm_df[pair[0]] del norm_df[pair[1]] norm_df[pair[0]] = corrected_x norm_df[pair[1]] = corrected_y return norm_df, df
def norm_ma_lowess(exp_obj, pairs, frac=0.75, missing="none", value_method="tc"): """ Run MA-based loess normalization on total count normalized expression values. Computes M = log(X/Y) A = 0.5 * log(X*Y) where X, Y are total count normalized expression values for sample X and sample Y, respectively. Fits loess regression M ~ A and corrects X and Y accordingly. Assumes input X and Y values are non-logged. Parameters: ----------- exp_obj: Experiment object pairs: Pairs to consider when doing the normalization frac: frac parameter to lowess() missing: missing parameter to lowess() value_method: Method to use to get values for each sample. If 'tc', then normalize counts by total counts and then use that for lowess. Returns a normalized dataframe of values followed by the dataframe of values used in the normalization (the ones obtained by 'value_method', since lowess is not done on the counts.) """ # Total counts normalized dataframe of counts if value_method == "tc": df = norm_tc(exp_obj) df = utils.drop_null_rows(df, columns=utils.flatten(pairs)) else: raise Exception("Unknown value method %s" %(value_method)) num_pairs = len(pairs) print("LOWESS MA normalizing %d pairs" %(num_pairs)) norm_df = df.copy() norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs)) for pair in pairs: x, y = df[pair[0]], df[pair[1]] M = np.log2(x) - np.log2(y) # A = average intensity 1/2(XY) A = 0.5 * (np.log2(x) + np.log2(y)) # Fit M ~ A corrected_M = run_lowess(A, M, frac=frac, missing=missing) corrected_x = 2**((2*A + corrected_M)/2.) corrected_y = 2**((2*A - corrected_M)/2.) del norm_df[pair[0]] del norm_df[pair[1]] norm_df[pair[0]] = corrected_x norm_df[pair[1]] = corrected_y return norm_df, df
def norm_ma_lowess(exp_obj, pairs, frac=0.75, missing="none", value_method="tc"): """ Run MA-based loess normalization on total count normalized expression values. Computes M = log(X/Y) A = 0.5 * log(X*Y) where X, Y are total count normalized expression values for sample X and sample Y, respectively. Fits loess regression M ~ A and corrects X and Y accordingly. Assumes input X and Y values are non-logged. Parameters: ----------- exp_obj: Experiment object pairs: Pairs to consider when doing the normalization frac: frac parameter to lowess() missing: missing parameter to lowess() value_method: Method to use to get values for each sample. If 'tc', then normalize counts by total counts and then use that for lowess. Returns a normalized dataframe of values followed by the dataframe of values used in the normalization (the ones obtained by 'value_method', since lowess is not done on the counts.) """ # Total counts normalized dataframe of counts if value_method == "tc": df = norm_tc(exp_obj) df = utils.drop_null_rows(df, columns=utils.flatten(pairs)) else: raise Exception, "Unknown value method %s" %(value_method) num_pairs = len(pairs) print "LOWESS MA normalizing %d pairs" %(num_pairs) norm_df = df.copy() norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs)) for pair in pairs: x, y = df[pair[0]], df[pair[1]] M = np.log2(x) - np.log2(y) # A = average intensity 1/2(XY) A = 0.5 * (np.log2(x) + np.log2(y)) # Fit M ~ A corrected_M = run_lowess(A, M, frac=frac, missing=missing) corrected_x = 2**((2*A + corrected_M)/2.) corrected_y = 2**((2*A - corrected_M)/2.) del norm_df[pair[0]] del norm_df[pair[1]] norm_df[pair[0]] = corrected_x norm_df[pair[1]] = corrected_y return norm_df, df