def generateY(rep=None, Genes=None, Cond=None, TF=None, coeff=None, offset_beta=None, covar_cond=None, covar_data=None, offset_data=None, frac=None, fracNoise=None): ''' Function to generate randomly generated responses Y based on generated coefficients beta :param rep: number of repetitions to draw Y (Genes x Cond) :param Genes: first dimension of Y - number of genes :param Cond: second dimension of Y - number of conditions :param TF: parameter dimension - number of Transcription Factors :param coeff: coefficients in model (Genes x TF) - motifs :param offset_beta: constant variation parameter - sigma_beta^2 :param covar_cond: covariance of conditions (Cond x Cond) - V_Cond :param covar_data: covariance of data (Cond x Cond) - Sigma_C :param offset_data: constant variation parameter of data - delta :return: Y | beta - multivariate normal distributed with N( 0, sigma_beta^2 * V \otimes coeff'coeff.T + delta * Sigma \otimes I_G) ''' # precompute some values cov_noise = offset_data * covar_data cov_motif = offset_beta * covar_cond if isinstance(offset_beta, list): if offset_beta.__len__() == 1: offset_beta = offset_beta[0] #POOLING Y_Pool = Pool(max(cpu_count() // 2, 1)).map( partial(genData, TF=TF, Genes=Genes, Cond=Cond, coeff=coeff, cov_motif=cov_motif, cov_noise=cov_noise), range(rep)) # initialize dataframes Y = pd.DataFrame(columns=range(rep)) beta = pd.DataFrame(columns=range(rep)) offset_beta_df = pd.DataFrame(columns=range(rep)) # run pover all repetitions for rand in range(Y_Pool.__len__()): Ybeta = Y_Pool[rand] keys = Ybeta.keys() if keys.__len__() != 2: raise ('Error: Pooling error') Y[rand] = pd.Series(Ybeta['Y_r'].reshape(-1, order='F')) beta[rand] = Ybeta['beta_r'].reshape(-1, order='F') # bisection: rsqrd = var.varCoeff(Y=Y[rand], A=coeff, b=Ybeta['beta_r']) offset_beta_old = offset_beta lim_low = 0 lim_up = 1e+4 * offset_beta_old idx = 0 abs_diff = False # if no fraction betweee nsignal and noise is set, set to 0.2 if frac is None: frac = 0.2 # find right parameters such that signal is $frac while not tp.checkRange(rsq=rsqrd, frac=frac) and idx < 1000: if rsqrd < (frac - 0.01): if abs_diff: lim_up = 1e+1 * lim_up lim_low = offset_beta_old offset_beta_new = (lim_up - offset_beta_old) / 2 else: if abs_diff: lim_low = 0 lim_up = offset_beta_old offset_beta_new = (offset_beta_old - lim_low) / 2 rsqrd_old = rsqrd cov_motif_new = offset_beta_new * covar_cond Ybeta = genData(rand, Cond=Cond, Genes=Genes, TF=TF, coeff=coeff, cov_motif=cov_motif_new, cov_noise=cov_noise) rsqrd = var.varCoeff(Y=Ybeta['Y_r'].reshape(-1, order='F'), A=coeff, b=Ybeta['beta_r']) offset_beta_old = offset_beta_new abs_diff = abs(rsqrd - rsqrd_old) < 0.01 idx += 1 # update parameter offset_beta_df[rand] = [offset_beta_old] # reshape updated gene expression and motif influence Y[rand] = Ybeta['Y_r'].reshape(-1, order='F') beta[rand] = Ybeta['beta_r'].reshape(-1, order='F') # create dictionary of input parameters params = { 'G': Genes, 'C': Cond, 'TF': TF, 'sigma_beta2': offset_beta_df, 'delta': offset_data, 'V': covar_cond, 'Sigma': covar_data, 'motif_TG': coeff, 'frac': frac, 'fracNoise': fracNoise } # retrun Ybp object return Ybp.Ybetaparam(Y=Y, beta=beta, parameter=params)