def main(argv):
	inputfile = ''
	file_name = ''
	k_mer_arg=''
	default_file_name='input_DNA.txt'
	k_mer_ln=10 #default value for the length of the pattern
	try:
		opts, args = getopt.getopt(argv,"hi:k:")
	except getopt.GetoptError:
	    print 'Usage: '+os.path.basename(__file__)+ ' -i <inputfile> -k <k-mer length>'
	    sys.exit(2)
	for opt, arg in opts:
	  if opt == '-h':
		 print 'Usage: '+os.path.basename(__file__)+ ' -i <inputfile> -k <k-mer length>'
		 sys.exit()
	  elif opt in ("-i"):
		inputfile = arg
	  elif opt in ("-k"):
		k_mer_arg = arg

	if (len(inputfile))>0:
		file_name=inputfile
		print 'Using DNA inputfile: '+inputfile
	else:
		file_name=default_file_name
		print 'Using default inputfile: '+file_name
	
	try:
		int(k_mer_arg)>0
		k_mer_ln=int(k_mer_arg)
		print 'Length of k-mer is '+ str(k_mer_ln)+' bp'
	except:
		print 'Using default k-mer length of '+str(k_mer_ln) + ' bp'



	#print type(os.path.basename(__file__))

	
	#file_name='input_DNA.txt'
	try:
		hand=open(file_name)
	except:
		print "Error! The specified file could not be opened. Using the defile input file option: "+default_file_name
		hand=open(default_file_name)
	dataset=hand.read()
	hand.close()

	#Cleaning the data
	dataset_cln=dataset

	dataset_cln=dataset_cln.strip().upper()
	dataset_lst=dataset_cln.split('\n')
	dataset_cln=''.join(dataset_lst)
	
	#print len(dataset)
	print 'We are working with the dataset of '+ str(len(dataset_cln))+' characters....'
	#print k_mer_ln
	if len(dataset_cln) > k_mer_ln:
		results=frequent_patterns(dataset_cln,k_mer_ln)
	else:
		print "\nError! k-mer length should be less than number of characters in the DNA dataset"
		print "Please try changing the input parameters"		
		return
		
		
#	print results
	if max(results.values())==1:
		print "No motif occurs more than once. Please try changing the input parameters"
	elif len(results)==1:
		print '\n'+'Single most frequent pattern found'+'\n'
		print str(k_mer_ln)+" bp motif "+results.keys()[0]+" occurs "+str(results.values()[0])+" times in the input DNA sequence"
	elif len(results)>1:
		print '\n'+'No single most frequent pattern found'+'\n'
		for item in results:
			print str(k_mer_ln)+" bp motif "+item+" occurs "+str(results[item])+" times in the input DNA sequence"
	else:
		print "No results found. Please try changing the input parameters"
Exemple #2
0
def main(argv):
	inputfile = ''
	file_name = ''
	k_mer_arg=''
	default_file_name='input_DNA.txt'
	k_mer_ln=10 #default value for the length of the pattern
	try:
		opts, args = getopt.getopt(argv,"hi:k:")
	except getopt.GetoptError:
	    print 'test.py -i <inputfile> -k <k-mer length>'
	    sys.exit(2)
	for opt, arg in opts:
	  if opt == '-h':
		 print 'Usage: test.py -i <inputfile> -k <k-mer length>'
		 sys.exit()
	  elif opt in ("-i"):
		inputfile = arg
	  elif opt in ("-k"):
		k_mer_arg = arg


	
	if int(k_mer_arg)>0:
		k_mer_ln=int(k_mer_arg)
		print 'Length of k-mer is '+ str(k_mer_ln)+' bp'
	else:
		print 'Using default k-mer length of '+str(k_mer_ln) + ' bp'

	if (len(inputfile))>0:
		file_name=inputfile
		print 'Using DNA inputfile: '+inputfile
	else:
		file_name=default_file_name
		print 'Using default inputfile: '+file_name

#	print 'hehe'

	
	#file_name='input_DNA.txt'
	hand=open(file_name)
	dataset=hand.read()
	hand.close()

	#Cleaning the data
	dataset_cln=dataset

	dataset_cln=dataset_cln.strip().upper()
	dataset_lst=dataset_cln.split('\n')
	dataset_cln=''.join(dataset_lst)
	
	#print len(dataset)
	print 'We are working with the dataset of '+ str(len(dataset_cln))+' characters....'
	#print k_mer_ln
	
	results=frequent_patterns(dataset_cln,k_mer_ln)
	
	if len(results)==1:
		print '\n'+'Single most frequent pattern found'+'\n'
		print str(k_mer_ln)+" bp motif "+results.keys()[0]+" occurs "+str(results.values()[0])+" times in the input DNA sequence"
		
	elif len(results)>1:
		print '\n'+'No single most frequent pattern found'+'\n'
		for item in results:
			print str(k_mer_ln)+" bp motif "+item+" occurs "+str(results[item])+" times in the input DNA sequence"