Beispiel #1
0
def generate_2x0(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-1]):
        for newBase1 in seqlib.allBases(rna):
            for newBase2 in seqlib.allBases(rna):
                variant = seq[:i+1] + newBase1 + newBase2 + seq[i+1:]
                listVariants.append(variant)
    
    return listVariants
Beispiel #2
0
def generate_2x0(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-1]):
        for newBase1 in seqlib.allBases(rna):
            for newBase2 in seqlib.allBases(rna):
                variant = seq[:i + 1] + newBase1 + newBase2 + seq[i + 1:]
                listVariants.append(variant)

    return listVariants
Beispiel #3
0
def generate_4x4(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-3]):
        nextBase = seq[i+1]
        nextNextBase = seq[i+2]
        nextNextNextBase = seq[i+3]
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for newBase2 in seqlib.allBases(rna):
                for newBase3 in seqlib.allBases(rna):
                    for otherBase4 in seqlib.allOtherBases(nextNextNextBase, rna):
                        variant = seq[:i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[i+4:]
                        listVariants.append(variant)
    
    return listVariants
Beispiel #4
0
def generateSatLib(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listListMutations = [seqlib.allBases(rna=False) 
                         if base == 'N' else [base] 
                         for base in list(consensus)]
    listVariants = [''.join(seq) for seq in list(product(*listListMutations))]

    return listVariants
Beispiel #5
0
def generate_4x4(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-3]):
        nextBase = seq[i + 1]
        nextNextBase = seq[i + 2]
        nextNextNextBase = seq[i + 3]
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for newBase2 in seqlib.allBases(rna):
                for newBase3 in seqlib.allBases(rna):
                    for otherBase4 in seqlib.allOtherBases(
                            nextNextNextBase, rna):
                        variant = seq[:
                                      i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[
                                          i + 4:]
                        listVariants.append(variant)

    return listVariants
Beispiel #6
0
def generate_1x3(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-2]):
        for newBase in seqlib.allBases(rna):
            variant = seq[:i] + newBase + seq[i + 3:]
            listVariants.append(variant)

    return listVariants
Beispiel #7
0
def generateSatLib(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listListMutations = [
        seqlib.allBases(rna=False) if base == 'N' else [base]
        for base in list(consensus)
    ]
    listVariants = [''.join(seq) for seq in list(product(*listListMutations))]

    return listVariants
Beispiel #8
0
def generate_1x3(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-2]):
        for newBase in seqlib.allBases(rna):
            variant = seq[:i] + newBase + seq[i+3:]
            listVariants.append(variant)
    
    return listVariants
Beispiel #9
0
def seqCompo(seqs, libSeq=None, startPos=1, norm=True, RNA=False, legendloc=2):
    """Plot sequence composition as a line graph"""
    # Generate default libSeq
    if libSeq is None:
        libSeq = 'N' * len(seqs[0])

    # Get library positions 
    libPos = [i for (i, base) in enumerate(libSeq.upper()) if base == 'N']
    xTickLabels = [str(i+startPos) for i in libPos]
    
    # Convert to pd.series if not already
    seqs = pd.Series(seqs)

    # Constants
    allBases = seqlib.allBases(RNA)
    numSeqs = len(seqs)
    numPos = len(libPos)
    colors = sns.color_palette("Paired", 12)

    # Compute sequence composition matrix
    compo = pd.DataFrame(index=libPos, columns=allBases)
    for pos in libPos:
        for base in allBases:
            compo[base][pos] = np.sum(seqs.str[pos] == base)
            
    if norm:
        compo = compo / numSeqs * 100
        ylabel = 'Sequence content (%)'
    else:
        ylabel = 'Sequence content (count)'

    # Plot
    for i, base in enumerate(allBases):
        plt.plot(range(0, numPos), compo[base], color=colors[2*i+1], linewidth=3, label=base)
    
    # Add legend and axis labels, and change x tick labels    
    setproperties(legend=True, legendloc=legendloc, 
                  xlabel='Position', ylabel=ylabel)
    plt.xticks(range(0, numPos), xTickLabels)

    return plt.gca()