def main(): usage = "\n%prog [options]" parser = OptionParser(usage,version="%prog " + __version__) parser.add_option("-c","--cod",action="store",dest="coding_file",help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in fasta format") parser.add_option("-n","--noncod",action="store",dest="noncoding_file",help="Noncoding sequences in fasta format") (options,args)=parser.parse_args() if not options.coding_file and not options.noncoding_file: parser.print_help() sys.exit(0) cod = kmer_freq_file(fastafile = options.coding_file, word_size = 6, step_size = 3, frame = 0) noncod = kmer_freq_file(fastafile = options.noncoding_file, word_size = 6, step_size = 1, frame = 0) #for i,j in cod.items(): # print str(i) + '\t' + str(j) cod_sum = 0.0 cod_sum += sum(cod.values()) noncod_sum = 0.0 noncod_sum += sum(noncod.values()) print('hexamer' + '\t' + 'coding' + '\t' + 'noncoding') for kmer in cod: if 'N' in kmer: continue print(kmer + '\t' + str(float(cod[kmer]/cod_sum)) + '\t' + str(float(noncod[kmer]/noncod_sum)))
def main(): usage = "\n%prog [options]" parser = OptionParser(usage, version="%prog " + __version__) parser.add_option("-f", "--cod", action="store", dest="coding_file", help="sequence in fasta format") (options, args) = parser.parse_args() if not options.coding_file: parser.print_help() sys.exit(0) cod = kmer_freq_file(fastafile=options.coding_file, word_size=6, step_size=1, frame=0) cod_sum = 0.0 cod_sum += sum(cod.values()) cod_sorted = sorted(cod, key=cod.get, reverse=True) with open("nue_hexamer.txt", "a") as f: f.write('hexamer' + '\t' + 'coding' + '\n') for kmer in cod_sorted: if 'N' in kmer: continue with open("nue_hexamer.txt", "a") as f1: f1.write(kmer + '\t' + str(float(cod[kmer] / cod_sum)) + '\n')