Example #1
0
def main():
	usage = "\n%prog  [options]"
	parser = OptionParser(usage,version="%prog " + __version__)
	parser.add_option("-c","--cod",action="store",dest="coding_file",help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in fasta format")
	parser.add_option("-n","--noncod",action="store",dest="noncoding_file",help="Noncoding sequences in fasta format")
	(options,args)=parser.parse_args()

	if not options.coding_file and not options.noncoding_file:
		parser.print_help()
		sys.exit(0)		
	cod = kmer_freq_file(fastafile = options.coding_file, word_size = 6, step_size = 3, frame = 0)
	noncod = kmer_freq_file(fastafile = options.noncoding_file, word_size = 6, step_size = 1, frame = 0)
	
	#for i,j in cod.items():
	#	print str(i) + '\t' + str(j)
	
	cod_sum = 0.0
	cod_sum += sum(cod.values())
	noncod_sum = 0.0
	noncod_sum += sum(noncod.values())
	
	print('hexamer' + '\t' + 'coding' + '\t' + 'noncoding')
	for kmer in cod:
		if 'N' in kmer:
			continue
		print(kmer + '\t' + str(float(cod[kmer]/cod_sum))  + '\t' + str(float(noncod[kmer]/noncod_sum))) 
Example #2
0
def main():
    usage = "\n%prog  [options]"
    parser = OptionParser(usage, version="%prog " + __version__)
    parser.add_option("-f",
                      "--cod",
                      action="store",
                      dest="coding_file",
                      help="sequence in fasta format")
    (options, args) = parser.parse_args()

    if not options.coding_file:
        parser.print_help()
        sys.exit(0)
    cod = kmer_freq_file(fastafile=options.coding_file,
                         word_size=6,
                         step_size=1,
                         frame=0)

    cod_sum = 0.0
    cod_sum += sum(cod.values())

    cod_sorted = sorted(cod, key=cod.get, reverse=True)

    with open("nue_hexamer.txt", "a") as f:
        f.write('hexamer' + '\t' + 'coding' + '\n')
        for kmer in cod_sorted:
            if 'N' in kmer:
                continue
            with open("nue_hexamer.txt", "a") as f1:
                f1.write(kmer + '\t' + str(float(cod[kmer] / cod_sum)) + '\n')