def equalize_lib_sizes(counts, groups, lib_size, dispersion=0, common_size=None): """ Equalize the library sizes """ if not common_size: common_size = np.exp(np.mean(np.log(lib_size))) try: len(dispersion) except TypeError: dispersion = np.repeat(dispersion, counts.shape[0]) input_mean = np.empty(counts.shape, dtype=np.double) output_mean = input_mean.copy() for key, group in groups.iteritems(): beta = glm_one_group_numba( np.ascontiguousarray(counts.ix[:,group].as_matrix(), dtype=np.int32), dispersion, np.ascontiguousarray(np.log(lib_size[group]), dtype=np.double) ) beta = np.asarray(beta) bn_lambda = np.exp(beta).T.reshape(len(beta),1) temp_lib_size = np.array(lib_size[group]).reshape(1, len(lib_size[group])) out_size = np.repeat(common_size, len(group)).reshape(1, len(group)) input_mean[:, group] = np.dot(bn_lambda, temp_lib_size) output_mean[:, group] = np.dot(bn_lambda, out_size) pseudo = q2qnbinom(np.asarray(counts.as_matrix()), input_mean, output_mean, dispersion) pseudo[pseudo < 0] = 0 return pseudo, common_size
def equalize_lib_sizes(counts, groups, lib_size, dispersion=0, common_size=None): """ Equalize the library sizes """ if not common_size: common_size = np.exp(np.mean(np.log(lib_size))) try: len(dispersion) except TypeError: dispersion = np.repeat(dispersion, counts.shape[0]) input_mean = np.empty(counts.shape, dtype=np.double) output_mean = input_mean.copy() for key, group in groups.iteritems(): beta = glm_one_group_numba( np.ascontiguousarray(counts.ix[:, group].as_matrix(), dtype=np.int32), dispersion, np.ascontiguousarray(np.log(lib_size[group]), dtype=np.double)) beta = np.asarray(beta) bn_lambda = np.exp(beta).T.reshape(len(beta), 1) temp_lib_size = np.array(lib_size[group]).reshape( 1, len(lib_size[group])) out_size = np.repeat(common_size, len(group)).reshape(1, len(group)) input_mean[:, group] = np.dot(bn_lambda, temp_lib_size) output_mean[:, group] = np.dot(bn_lambda, out_size) pseudo = q2qnbinom(np.asarray(counts.as_matrix()), input_mean, output_mean, dispersion) pseudo[pseudo < 0] = 0 return pseudo, common_size
def average_cpm(y, lib_size = None, prior_count=2, dispersion=0.05): """ Parameters --------- y: matrix of counts lib_size: prior_count: dispersion: """ y = np.ascontiguousarray(y) #if lib_size == None: np.sum(y, axis=0) prior_counts_scaled = np.asarray(lib_size/np.mean(lib_size) * prior_count, dtype=np.double).reshape(len(lib_size), 1) offset = np.log(np.asarray(lib_size, dtype=np.double).reshape(len(lib_size),1) + 2 * prior_counts_scaled) dispersion = np.repeat(dispersion, y.shape[0]) abundence = glm_one_group_numba(np.ascontiguousarray((y.T + prior_counts_scaled).T), dispersion, offset.T[0]) return((np.asarray(abundence) + np.log(1e6))/np.log(2))
def average_cpm(y, lib_size=None, prior_count=2, dispersion=0.05): """ Parameters --------- y: matrix of counts lib_size: prior_count: dispersion: """ y = np.ascontiguousarray(y) #if lib_size == None: np.sum(y, axis=0) prior_counts_scaled = np.asarray( lib_size / np.mean(lib_size) * prior_count, dtype=np.double).reshape(len(lib_size), 1) offset = np.log( np.asarray(lib_size, dtype=np.double).reshape(len(lib_size), 1) + 2 * prior_counts_scaled) dispersion = np.repeat(dispersion, y.shape[0]) abundence = glm_one_group_numba( np.ascontiguousarray((y.T + prior_counts_scaled).T), dispersion, offset.T[0]) return ((np.asarray(abundence) + np.log(1e6)) / np.log(2))