def generate_2x0(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-1]): for newBase1 in seqlib.allBases(rna): for newBase2 in seqlib.allBases(rna): variant = seq[:i+1] + newBase1 + newBase2 + seq[i+1:] listVariants.append(variant) return listVariants
def generate_2x0(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-1]): for newBase1 in seqlib.allBases(rna): for newBase2 in seqlib.allBases(rna): variant = seq[:i + 1] + newBase1 + newBase2 + seq[i + 1:] listVariants.append(variant) return listVariants
def generate_4x4(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-3]): nextBase = seq[i+1] nextNextBase = seq[i+2] nextNextNextBase = seq[i+3] for otherBase1 in seqlib.allOtherBases(base, rna): for newBase2 in seqlib.allBases(rna): for newBase3 in seqlib.allBases(rna): for otherBase4 in seqlib.allOtherBases(nextNextNextBase, rna): variant = seq[:i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[i+4:] listVariants.append(variant) return listVariants
def generateSatLib(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listListMutations = [seqlib.allBases(rna=False) if base == 'N' else [base] for base in list(consensus)] listVariants = [''.join(seq) for seq in list(product(*listListMutations))] return listVariants
def generate_4x4(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-3]): nextBase = seq[i + 1] nextNextBase = seq[i + 2] nextNextNextBase = seq[i + 3] for otherBase1 in seqlib.allOtherBases(base, rna): for newBase2 in seqlib.allBases(rna): for newBase3 in seqlib.allBases(rna): for otherBase4 in seqlib.allOtherBases( nextNextNextBase, rna): variant = seq[: i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[ i + 4:] listVariants.append(variant) return listVariants
def generate_1x3(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-2]): for newBase in seqlib.allBases(rna): variant = seq[:i] + newBase + seq[i + 3:] listVariants.append(variant) return listVariants
def generateSatLib(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listListMutations = [ seqlib.allBases(rna=False) if base == 'N' else [base] for base in list(consensus) ] listVariants = [''.join(seq) for seq in list(product(*listListMutations))] return listVariants
def generate_1x3(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-2]): for newBase in seqlib.allBases(rna): variant = seq[:i] + newBase + seq[i+3:] listVariants.append(variant) return listVariants
def seqCompo(seqs, libSeq=None, startPos=1, norm=True, RNA=False, legendloc=2): """Plot sequence composition as a line graph""" # Generate default libSeq if libSeq is None: libSeq = 'N' * len(seqs[0]) # Get library positions libPos = [i for (i, base) in enumerate(libSeq.upper()) if base == 'N'] xTickLabels = [str(i+startPos) for i in libPos] # Convert to pd.series if not already seqs = pd.Series(seqs) # Constants allBases = seqlib.allBases(RNA) numSeqs = len(seqs) numPos = len(libPos) colors = sns.color_palette("Paired", 12) # Compute sequence composition matrix compo = pd.DataFrame(index=libPos, columns=allBases) for pos in libPos: for base in allBases: compo[base][pos] = np.sum(seqs.str[pos] == base) if norm: compo = compo / numSeqs * 100 ylabel = 'Sequence content (%)' else: ylabel = 'Sequence content (count)' # Plot for i, base in enumerate(allBases): plt.plot(range(0, numPos), compo[base], color=colors[2*i+1], linewidth=3, label=base) # Add legend and axis labels, and change x tick labels setproperties(legend=True, legendloc=legendloc, xlabel='Position', ylabel=ylabel) plt.xticks(range(0, numPos), xTickLabels) return plt.gca()