Beispiel #1
0
def equalize_lib_sizes(counts, groups, lib_size, dispersion=0, common_size=None):
    """ Equalize the library sizes
    """
    if not common_size:
        common_size = np.exp(np.mean(np.log(lib_size)))
    try: len(dispersion)
    except TypeError: dispersion = np.repeat(dispersion, 
            counts.shape[0]) 
    input_mean = np.empty(counts.shape, dtype=np.double)
    output_mean = input_mean.copy()
    for key, group in groups.iteritems():
        beta = glm_one_group_numba(
                np.ascontiguousarray(counts.ix[:,group].as_matrix(),
                    dtype=np.int32),
                dispersion,
                np.ascontiguousarray(np.log(lib_size[group]),
                    dtype=np.double)
                )
        beta = np.asarray(beta)
        bn_lambda = np.exp(beta).T.reshape(len(beta),1)
        temp_lib_size = np.array(lib_size[group]).reshape(1,
                len(lib_size[group]))
        out_size = np.repeat(common_size, len(group)).reshape(1,
                len(group))
        input_mean[:, group] = np.dot(bn_lambda, temp_lib_size)
        output_mean[:, group] = np.dot(bn_lambda, 
                out_size)
    pseudo = q2qnbinom(np.asarray(counts.as_matrix()), 
            input_mean, output_mean, dispersion)
    pseudo[pseudo < 0] = 0
    return pseudo, common_size
Beispiel #2
0
def equalize_lib_sizes(counts,
                       groups,
                       lib_size,
                       dispersion=0,
                       common_size=None):
    """ Equalize the library sizes
    """
    if not common_size:
        common_size = np.exp(np.mean(np.log(lib_size)))
    try:
        len(dispersion)
    except TypeError:
        dispersion = np.repeat(dispersion, counts.shape[0])
    input_mean = np.empty(counts.shape, dtype=np.double)
    output_mean = input_mean.copy()
    for key, group in groups.iteritems():
        beta = glm_one_group_numba(
            np.ascontiguousarray(counts.ix[:, group].as_matrix(),
                                 dtype=np.int32), dispersion,
            np.ascontiguousarray(np.log(lib_size[group]), dtype=np.double))
        beta = np.asarray(beta)
        bn_lambda = np.exp(beta).T.reshape(len(beta), 1)
        temp_lib_size = np.array(lib_size[group]).reshape(
            1, len(lib_size[group]))
        out_size = np.repeat(common_size, len(group)).reshape(1, len(group))
        input_mean[:, group] = np.dot(bn_lambda, temp_lib_size)
        output_mean[:, group] = np.dot(bn_lambda, out_size)
    pseudo = q2qnbinom(np.asarray(counts.as_matrix()), input_mean, output_mean,
                       dispersion)
    pseudo[pseudo < 0] = 0
    return pseudo, common_size
Beispiel #3
0
def average_cpm(y, lib_size = None, prior_count=2, dispersion=0.05):
    """ 

    Parameters
    ---------
    y: matrix of counts
    lib_size: 
    prior_count:
    dispersion:

    """
    y = np.ascontiguousarray(y)
    #if lib_size == None: np.sum(y, axis=0)
    prior_counts_scaled = np.asarray(lib_size/np.mean(lib_size) *
            prior_count, dtype=np.double).reshape(len(lib_size), 1)
    offset = np.log(np.asarray(lib_size,
        dtype=np.double).reshape(len(lib_size),1) + 2 * prior_counts_scaled)
    dispersion = np.repeat(dispersion, y.shape[0])
    abundence = glm_one_group_numba(np.ascontiguousarray((y.T +
        prior_counts_scaled).T), 
            dispersion, offset.T[0])
    return((np.asarray(abundence) + np.log(1e6))/np.log(2))
Beispiel #4
0
def average_cpm(y, lib_size=None, prior_count=2, dispersion=0.05):
    """ 

    Parameters
    ---------
    y: matrix of counts
    lib_size: 
    prior_count:
    dispersion:

    """
    y = np.ascontiguousarray(y)
    #if lib_size == None: np.sum(y, axis=0)
    prior_counts_scaled = np.asarray(
        lib_size / np.mean(lib_size) * prior_count,
        dtype=np.double).reshape(len(lib_size), 1)
    offset = np.log(
        np.asarray(lib_size, dtype=np.double).reshape(len(lib_size), 1) +
        2 * prior_counts_scaled)
    dispersion = np.repeat(dispersion, y.shape[0])
    abundence = glm_one_group_numba(
        np.ascontiguousarray((y.T + prior_counts_scaled).T), dispersion,
        offset.T[0])
    return ((np.asarray(abundence) + np.log(1e6)) / np.log(2))