import argparse import mcb185 import statistics parser = argparse.ArgumentParser(desciption='stats about sequence') #required arguments parser.add_argument('--file', required=True, type=str, metavar='<str>', help='required fasta file') arg = parser.parse_args() length = [] for name, seq in mcb185.read_fasta(arg.file): #print(name, len(seq)) length.append(len(seq)) length.sort() #print(length) #MIN: print('min is', min(length)) #MAX: print('max is', man(length)) #sum: #sum = 0 #for value in length: # sum += value
# Make a program that reports the amino acid composition in a file of proteins import mcb185 import sys total = 0 count = {} for name, seq in mcb185.read_fasta(sys.argv[1]): for aa in seq: if aa == "*": continue if aa in count: count[aa] += 1 else: count[aa] = 1 total += 1 for aa in count: print(aa, count[aa], count[aa] / total) """ python3 aa_comp.py -- fasta at_prots.fa | sort -nk2 W 528 0.012054244098442994 C 801 0.018286836217524315 H 1041 0.023766038080452946 M 1097 0.025044518515136296 Y 1281 0.02924523994338158 Q 1509 0.03445048171316378 F 1842 0.04205287429797726 N 1884 0.04301173462398977 P 2051 0.046824345920277614 T 2153 0.04915300671202228 R 2320 0.05296561800831012
import sys import argparse import mcb185 parser = argparse.ArgumentParser(description = 'translate RNAs to proteins') parser.add_argument('--seq', required = True, type = str, nargs = '+', metavar = '<str>', help = 'DNA sequ (file or sequences sep by spaces))') arg = parser.parse_args() if '.fa' in arg.seq: for name, seq in mcb185.read_fasta(arg.seq): print(f'>{name}') # Will print >name - looks like a fasta file print(mcb185.translate(seq)) else: for string in arg.seq: print(mcb185.translate(string)) """ for seq in sys.argv[1:]: prot = '' for i in range(0, len(seq), 3): prot += gcode[seq[i:i+3]] print(prot) """ # You have been given the code above # An example command line is # python3 translate.py ATGCGCCCGAACTAG ATGAAACCCGGGTTT # Your task is to write a new program with the following features # 1. Proper command line (argparse) # 2. Reads sequences in from fasta format rather than sys.argv