def main(df,model_df,T_LibCounts,T_mRNACounts,start=0,end=None): df = evaluate_model.main(df,model_df,left=start,right=None) #We assume only noise is binomial noise(which we approx as poisson) mymodel = Models.PoissonNoise() #calculate new expression levels based on energies of each sequence. libcounts,expcounts = mymodel.gennoisyexp(df,T_LibCounts,T_mRNACounts) return libcounts,expcounts
def main(df, mp, noisetype, npar, nbins, sequence_library=True, start=0, end=None): # validate noise parameters if not isinstance(npar, list): raise SortSeqError("Noise parameters must be given as a list") if noisetype == "Normal": if len(npar) != 1: raise SortSeqError( """For a normal noise model, there must be one input parameter (width of normal distribution)""" ) if noisetype == "LogNormal": if len(npar) != 2: raise SortSeqError( """For a LogNormal noise model there must be 2 input parameters""" ) if nbins <= 1: raise SortSeqError("number of bins must be greater than 1") # generate predicted energy of each sequence. df = evaluate_model.main(df, mp, left=start, right=None) # Determine model type to use for noise if noisetype == "LogNormal": NoiseModelSort = Models.LogNormalNoise(npar) elif noisetype == "Normal": NoiseModelSort = Models.NormalNoise(npar) elif noisetype == "None": NoiseModelSort = Models.NormalNoise([1e-16]) else: NoiseModelSort = Models.CustomModel(noisetype, npar) # Apply noise to our calculated energies noisyexp, listnoisyexp = NoiseModelSort.genlist(df) # Determine Expression Cutoffs for bins noisyexp.sort() cutoffs = list(noisyexp[np.linspace(0, len(noisyexp), nbins, endpoint=False, dtype=int)]) cutoffs.append(np.inf) seqs_arr = np.zeros([len(listnoisyexp), nbins], dtype=int) # split sequence into bins based on calculated cutoffs for i, entry in enumerate(listnoisyexp): seqs_arr[i, :] = np.histogram(entry, bins=cutoffs)[0] col_labels = ["ct_" + str(i + 1) for i in range(nbins)] if sequence_library: df["ct_0"] = utils.sample(df["ct"], int(df["ct"].sum() / nbins)) output_df = pd.concat([df, pd.DataFrame(seqs_arr, columns=col_labels)], axis=1) col_labels = utils.get_column_headers(output_df) output_df["ct"] = output_df[col_labels].sum(axis=1) output_df = output_df.drop("val", axis=1) return output_df