def main(df,model_df,T_LibCounts,T_mRNACounts,start=0,end=None):
   df = evaluate_model.main(df,model_df,left=start,right=None)
   #We assume only noise is binomial noise(which we approx as poisson)
   mymodel = Models.PoissonNoise()
   #calculate new expression levels based on energies of each sequence.	 
   libcounts,expcounts = mymodel.gennoisyexp(df,T_LibCounts,T_mRNACounts)
   return libcounts,expcounts
Exemple #2
0
def main(df, mp, noisetype, npar, nbins, sequence_library=True, start=0, end=None):
    # validate noise parameters
    if not isinstance(npar, list):
        raise SortSeqError("Noise parameters must be given as a list")
    if noisetype == "Normal":
        if len(npar) != 1:
            raise SortSeqError(
                """For a normal noise model, there must be one 
                 input parameter (width of normal distribution)"""
            )
    if noisetype == "LogNormal":
        if len(npar) != 2:
            raise SortSeqError(
                """For a LogNormal noise model there must 
                 be 2 input parameters"""
            )
    if nbins <= 1:
        raise SortSeqError("number of bins must be greater than 1")
    # generate predicted energy of each sequence.
    df = evaluate_model.main(df, mp, left=start, right=None)
    # Determine model type to use for noise
    if noisetype == "LogNormal":
        NoiseModelSort = Models.LogNormalNoise(npar)
    elif noisetype == "Normal":
        NoiseModelSort = Models.NormalNoise(npar)
    elif noisetype == "None":
        NoiseModelSort = Models.NormalNoise([1e-16])
    else:
        NoiseModelSort = Models.CustomModel(noisetype, npar)
    # Apply noise to our calculated energies
    noisyexp, listnoisyexp = NoiseModelSort.genlist(df)
    # Determine Expression Cutoffs for bins
    noisyexp.sort()
    cutoffs = list(noisyexp[np.linspace(0, len(noisyexp), nbins, endpoint=False, dtype=int)])
    cutoffs.append(np.inf)
    seqs_arr = np.zeros([len(listnoisyexp), nbins], dtype=int)
    # split sequence into bins based on calculated cutoffs
    for i, entry in enumerate(listnoisyexp):
        seqs_arr[i, :] = np.histogram(entry, bins=cutoffs)[0]
    col_labels = ["ct_" + str(i + 1) for i in range(nbins)]
    if sequence_library:
        df["ct_0"] = utils.sample(df["ct"], int(df["ct"].sum() / nbins))
    output_df = pd.concat([df, pd.DataFrame(seqs_arr, columns=col_labels)], axis=1)
    col_labels = utils.get_column_headers(output_df)
    output_df["ct"] = output_df[col_labels].sum(axis=1)
    output_df = output_df.drop("val", axis=1)
    return output_df