Beispiel #1
0
def R_norm_ma_lowess(exp_obj, pairs,
                     frac=0.75,
                     value_method="tc"):
    """
    Use R lowess.
    """
    # Total counts normalized dataframe of counts
    if value_method == "tc":
        df = norm_tc(exp_obj)
        df = utils.drop_null_rows(df, columns=utils.flatten(pairs))
    else:
        raise Exception("Unknown value method %s" %(value_method))
    num_pairs = len(pairs)
    print("LOWESS MA normalizing %d pairs" %(num_pairs))
    norm_df = df.copy()
    norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs))
    for pair in pairs:
        x, y = df[pair[0]], df[pair[1]]
        M = np.log2(x) - np.log2(y)
        # A = average intensity 1/2(XY)
        A = 0.5 * (np.log2(x) + np.log2(y))
        # Fit M ~ A
        corrected_M, corrected_factor = R_run_loess(A, M)
        corrected_x = 2**((2*A + corrected_M)/2.)
        corrected_y = 2**((2*A - corrected_M)/2.)
        del norm_df[pair[0]]
        del norm_df[pair[1]]
        norm_df[pair[0]] = corrected_x
        norm_df[pair[1]] = corrected_y
    return norm_df, df
Beispiel #2
0
def R_norm_ma_lowess(exp_obj, pairs,
                     frac=0.75,
                     value_method="tc"):
    """
    Use R lowess.
    """
    # Total counts normalized dataframe of counts
    if value_method == "tc":
        df = norm_tc(exp_obj)
        df = utils.drop_null_rows(df, columns=utils.flatten(pairs))
    else:
        raise Exception, "Unknown value method %s" %(value_method)
    num_pairs = len(pairs)
    print "LOWESS MA normalizing %d pairs" %(num_pairs)
    norm_df = df.copy()
    norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs))
    for pair in pairs:
        x, y = df[pair[0]], df[pair[1]]
        M = np.log2(x) - np.log2(y)
        # A = average intensity 1/2(XY)
        A = 0.5 * (np.log2(x) + np.log2(y))
        # Fit M ~ A
        corrected_M, corrected_factor = R_run_loess(A, M)
        corrected_x = 2**((2*A + corrected_M)/2.)
        corrected_y = 2**((2*A - corrected_M)/2.)
        del norm_df[pair[0]]
        del norm_df[pair[1]]
        norm_df[pair[0]] = corrected_x
        norm_df[pair[1]] = corrected_y
    return norm_df, df
Beispiel #3
0
def norm_ma_lowess(exp_obj, pairs,
                   frac=0.75,
                   missing="none",
                   value_method="tc"):
    """
    Run MA-based loess normalization on total
    count normalized expression values. Computes

      M = log(X/Y)
      A = 0.5 * log(X*Y)

    where X, Y are total count normalized expression values
    for sample X and sample Y, respectively.

    Fits loess regression M ~ A and corrects X and Y accordingly.

    Assumes input X and Y values are non-logged.

    Parameters:
    -----------

    exp_obj: Experiment object
    pairs: Pairs to consider when doing the normalization
    frac: frac parameter to lowess() 
    missing: missing parameter to lowess()
    value_method: Method to use to get values for each sample.
    If 'tc', then normalize counts by total counts and then use
    that for lowess.

    Returns a normalized dataframe of values followed by the
    dataframe of values used in the normalization (the ones obtained
    by 'value_method', since lowess is not done on the counts.)
    """
    # Total counts normalized dataframe of counts
    if value_method == "tc":
        df = norm_tc(exp_obj)
        df = utils.drop_null_rows(df, columns=utils.flatten(pairs))
    else:
        raise Exception("Unknown value method %s" %(value_method))
    num_pairs = len(pairs)
    print("LOWESS MA normalizing %d pairs" %(num_pairs))
    norm_df = df.copy()
    norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs))
    for pair in pairs:
        x, y = df[pair[0]], df[pair[1]]
        M = np.log2(x) - np.log2(y)
        # A = average intensity 1/2(XY)
        A = 0.5 * (np.log2(x) + np.log2(y))
        # Fit M ~ A
        corrected_M = run_lowess(A, M,
                                 frac=frac,
                                 missing=missing)
        corrected_x = 2**((2*A + corrected_M)/2.)
        corrected_y = 2**((2*A - corrected_M)/2.)
        del norm_df[pair[0]]
        del norm_df[pair[1]]
        norm_df[pair[0]] = corrected_x
        norm_df[pair[1]] = corrected_y
    return norm_df, df
Beispiel #4
0
def norm_ma_lowess(exp_obj, pairs,
                   frac=0.75,
                   missing="none",
                   value_method="tc"):
    """
    Run MA-based loess normalization on total
    count normalized expression values. Computes

      M = log(X/Y)
      A = 0.5 * log(X*Y)

    where X, Y are total count normalized expression values
    for sample X and sample Y, respectively.

    Fits loess regression M ~ A and corrects X and Y accordingly.

    Assumes input X and Y values are non-logged.

    Parameters:
    -----------

    exp_obj: Experiment object
    pairs: Pairs to consider when doing the normalization
    frac: frac parameter to lowess() 
    missing: missing parameter to lowess()
    value_method: Method to use to get values for each sample.
    If 'tc', then normalize counts by total counts and then use
    that for lowess.

    Returns a normalized dataframe of values followed by the
    dataframe of values used in the normalization (the ones obtained
    by 'value_method', since lowess is not done on the counts.)
    """
    # Total counts normalized dataframe of counts
    if value_method == "tc":
        df = norm_tc(exp_obj)
        df = utils.drop_null_rows(df, columns=utils.flatten(pairs))
    else:
        raise Exception, "Unknown value method %s" %(value_method)
    num_pairs = len(pairs)
    print "LOWESS MA normalizing %d pairs" %(num_pairs)
    norm_df = df.copy()
    norm_df = utils.drop_null_rows(norm_df, columns=utils.flatten(pairs))
    for pair in pairs:
        x, y = df[pair[0]], df[pair[1]]
        M = np.log2(x) - np.log2(y)
        # A = average intensity 1/2(XY)
        A = 0.5 * (np.log2(x) + np.log2(y))
        # Fit M ~ A
        corrected_M = run_lowess(A, M,
                                 frac=frac,
                                 missing=missing)
        corrected_x = 2**((2*A + corrected_M)/2.)
        corrected_y = 2**((2*A - corrected_M)/2.)
        del norm_df[pair[0]]
        del norm_df[pair[1]]
        norm_df[pair[0]] = corrected_x
        norm_df[pair[1]] = corrected_y
    return norm_df, df