Beispiel #1
0
def uncommon_Kmers():
    file = seqlib.read_fasta(arg.file)
    count = {}
    for name, seq in file:
        seq = seq.upper()
        for i in range(len(seq) - arg.k + 1):
            if seq[i:(i + arg.k)] not in count:
                count[seq[i:(i + arg.k)]] = 1
            else:
                count[seq[i:(i + arg.k)]] += 1

    sortedKmers = sorted(count, key=count.__getitem__)
    mostKmers = {}
    leastKmers = {}
    for i in range(arg.n):
        leastKmers[sortedKmers[i]] = count[sortedKmers[i]]
        mostKmers[sortedKmers[-i - 1]] = count[sortedKmers[-i - 1]]

    print(f"The most common kmers are {mostKmers}")
    print(f"The least common kmers are {leastKmers}")
parser.add_argument("--file", required=True, type=str, metavar='<path>')
parser.add_argument("--window", required=True, type=int, metavar='<int>')
parser.add_argument("--threshold", required=False, default=1, type=int)
parser.add_argument("--mod", required=False, type=str)
parser.add_argument("--step", required=False, type=int, default=1)
parser.add_argument("--verbose", action="store_true")
arg = parser.parse_args()


def status(s):
    if arg.verbose:
        sys.stderr.write(s)
        sys.stderr.write("\n")


myfasta = seqlib.read_fasta(arg.file)
window = arg.window

for name, seq in myfasta:
    seq = seq.upper()
    i = 0
    while (i < (len(seq) - window + 1)):
        selection = seq[i:i + window]
        e = seqlib.entropy(selection)

        if e < arg.threshold:
            status("entering modifications")
            status(f"window:{selection} {e}")
            status(f"current seq {seq}")
            if arg.mod == "lower": selection = selection.lower()
            else:
import seqlib

parser = argparse.ArgumentParser(description="Global alignment using SW alg")
parser.add_argument("--s1", required=True, type=str, metavar='<path>')
parser.add_argument("--s2", required=True, type=str, metavar='<path>')
parser.add_argument("--match", required=True, type=int)
parser.add_argument("--mismatch", required=True, type=int)
parser.add_argument("--indel", required=True, type=int)
arg = parser.parse_args()
'''
for debugging, this example is from wikipedia
seqa = "TGTTACGG"
seqb = "GGTTGACTA"
'''

file1 = seqlib.read_fasta(arg.s1)
file2 = seqlib.read_fasta(arg.s2)

seqa = ""
seqb = ""
for name, seq, in file1:
    seqa = seqa + seq

for name, seq in file2:
    seqb = seqb + seq
#make score and direction matrix

seqa = " " + seqa
seqb = " " + seqb
cols = len(seqa)
rows = len(seqb)
Beispiel #4
0
                    metavar='<int>',
                    help="only show this many k-mers")
parser.add_argument('--verbose',
                    action='store_true',
                    help='print some diagnostic messages to stderr')
arg = parser.parse_args()

if arg.verbose:
    sys.stderr.write(f'Reading {arg.fasta}\n')

#counting
seq_count = 0
nt_count = 0
k_count = {}

for name, seq in seqlib.read_fasta(arg.fasta):
    seq_count += 1
    nt_count += len(seq)
    for i in range(len(seq) - arg.k + 1):
        kmer = seq[i:i + arg.k]
        if kmer not in k_count: k_count[kmer] = 0
        k_count[kmer] += 1

if arg.verbose:
    sys.stderr.write(f'{seq_count} sequences\n')
    sys.stderr.write(f'{nt_count} letters\n')
    sys.stderr.write(f'{len(k_count)} kmers\n')

# output
n = 0
for kmer, count in sorted(k_count.items(),