def uncommon_Kmers(): file = seqlib.read_fasta(arg.file) count = {} for name, seq in file: seq = seq.upper() for i in range(len(seq) - arg.k + 1): if seq[i:(i + arg.k)] not in count: count[seq[i:(i + arg.k)]] = 1 else: count[seq[i:(i + arg.k)]] += 1 sortedKmers = sorted(count, key=count.__getitem__) mostKmers = {} leastKmers = {} for i in range(arg.n): leastKmers[sortedKmers[i]] = count[sortedKmers[i]] mostKmers[sortedKmers[-i - 1]] = count[sortedKmers[-i - 1]] print(f"The most common kmers are {mostKmers}") print(f"The least common kmers are {leastKmers}")
parser.add_argument("--file", required=True, type=str, metavar='<path>') parser.add_argument("--window", required=True, type=int, metavar='<int>') parser.add_argument("--threshold", required=False, default=1, type=int) parser.add_argument("--mod", required=False, type=str) parser.add_argument("--step", required=False, type=int, default=1) parser.add_argument("--verbose", action="store_true") arg = parser.parse_args() def status(s): if arg.verbose: sys.stderr.write(s) sys.stderr.write("\n") myfasta = seqlib.read_fasta(arg.file) window = arg.window for name, seq in myfasta: seq = seq.upper() i = 0 while (i < (len(seq) - window + 1)): selection = seq[i:i + window] e = seqlib.entropy(selection) if e < arg.threshold: status("entering modifications") status(f"window:{selection} {e}") status(f"current seq {seq}") if arg.mod == "lower": selection = selection.lower() else:
import seqlib parser = argparse.ArgumentParser(description="Global alignment using SW alg") parser.add_argument("--s1", required=True, type=str, metavar='<path>') parser.add_argument("--s2", required=True, type=str, metavar='<path>') parser.add_argument("--match", required=True, type=int) parser.add_argument("--mismatch", required=True, type=int) parser.add_argument("--indel", required=True, type=int) arg = parser.parse_args() ''' for debugging, this example is from wikipedia seqa = "TGTTACGG" seqb = "GGTTGACTA" ''' file1 = seqlib.read_fasta(arg.s1) file2 = seqlib.read_fasta(arg.s2) seqa = "" seqb = "" for name, seq, in file1: seqa = seqa + seq for name, seq in file2: seqb = seqb + seq #make score and direction matrix seqa = " " + seqa seqb = " " + seqb cols = len(seqa) rows = len(seqb)
metavar='<int>', help="only show this many k-mers") parser.add_argument('--verbose', action='store_true', help='print some diagnostic messages to stderr') arg = parser.parse_args() if arg.verbose: sys.stderr.write(f'Reading {arg.fasta}\n') #counting seq_count = 0 nt_count = 0 k_count = {} for name, seq in seqlib.read_fasta(arg.fasta): seq_count += 1 nt_count += len(seq) for i in range(len(seq) - arg.k + 1): kmer = seq[i:i + arg.k] if kmer not in k_count: k_count[kmer] = 0 k_count[kmer] += 1 if arg.verbose: sys.stderr.write(f'{seq_count} sequences\n') sys.stderr.write(f'{nt_count} letters\n') sys.stderr.write(f'{len(k_count)} kmers\n') # output n = 0 for kmer, count in sorted(k_count.items(),