def summarize(self, input, num_sentences ):
		# TODO: allow the caller to specify the tokenizer they want
		# TODO: allow the user to specify the sentence tokenizer they want
		
		tokenizer = RegexpTokenizer('\w+')
		
		# get the frequency of each word in the input
		base_words = [word.lower() 
			for word in tokenizer.tokenize(input)]
		words = [word for word in base_words if word not in stopwords.words()]
		
		word_frequencies = FreqDist(words)
		
		# now create a set of the most frequent words
		most_frequent_words = [pair[0] for pair in 
			word_frequencies.items()[:100]]
		
		
		#making a list of the top 15 most frequently appearing words
		most_freq_w = word_frequencies.items()[:10]
		print "********************"
		print word_frequencies
		print "Most frequent words are......."
		print most_freq_w
		print "********************"
		#print most_freq_w.values()
		
		#generating a dictionary of the most frequently appearing words list
		dict_most_freq_words=dict([(k,v) for k,v in most_freq_w])
		print dict_most_freq_words
		print "KEYS are      :"
		print dict_most_freq_words.keys()
		
		# break the input up into sentences.  working_sentences is used 
		# for the analysis, but actual_sentences is used in the results
		# so capitalization will be correct.
		
		sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
		actual_sentences = sent_detector.tokenize(input)
		working_sentences = [sentence.lower() 
			for sentence in actual_sentences]

		# iterate over the most frequent words, and add the first sentence
		# that inclues each word to the result.
		output_sentences = []
		
		#generate the respective IDF of the top 15 most frequently appearing words
		TFIDF = generateTfidf(dict_most_freq_words)
		print "GENERATED WORDS AFTER IDF ARE:     "
		print TFIDF
		
		#now we need to multiply the IDF and TF to get thr TFIDF
		#TFIDF=dict()
		#TFIDF=dict([(n, dict_most_freq_words.get(n,0) * generated_words_IDF.get(n,0)) for n in set(dict_most_freq_words) | set (generated_words_IDF)])
		
		print "TFIDF is"
		print TFIDF
		
		TFIDF_after_SORT = sorter(TFIDF)
		print "TFIDF after sorting is"
		print TFIDF_after_SORT
		
		highest_ranked_words_dict = dict()
		highest_ranked_words_dict = dict([(k,v) for k,v in TFIDF_after_SORT])
		
		#TFIDF_after_SORT.keys()
		#print "highest_ranked_words are: "
		#print highest_ranked_words_dict
		
		highest_ranked_words = highest_ranked_words_dict.keys()
		#print highest_ranked_words
		
		for word in highest_ranked_words:
			for i in range(0, len(working_sentences)):
				if (word in working_sentences[i] 
				  and actual_sentences[i] not in output_sentences):
					output_sentences.append(actual_sentences[i])
					break
				if len(output_sentences) >= num_sentences: break
			if len(output_sentences) >= num_sentences: break
			
		# sort the output sentences back to their original order
		output_sentences = self.reorder_sentences(output_sentences, input)

		# concatinate the sentences into a single string
		return "  ".join(output_sentences)
	def summarize(self, input, num_sentences ):
		
		
		#Tokenize the words
		tokenizer = RegexpTokenizer('\w+')
		
		# get the frequency of each word in the input
		#Tokenize the input
		base_words = []
		time1 = time.time()
		tokenized_words = tokenizer.tokenize(input.lower())
		time2 = time.time()
		print "Total time taken for word tokenization is: " + str(time2 - time1)
		print "no of tokenized words are: " +str(len(tokenized_words))
		
		
		#Frequence of words
		time1 = time.time()
		word_frequencies = FreqDist(tokenized_words)
		time2 = time.time()
		print "Total time taken for calculating word frequencies is: " + str(time2 - time1)
		
		#baseWords = word_frequencies.keys()
		
		#Take the 10 most frequent words excluding stop words
		time1 = time.time()
		word_lst = []
		countValidWords = 0
		for word in word_frequencies.items():
			if word[0].lower() not in stopwords.words() and word[0].lower() != 'would':
				word_lst.append(word)
				countValidWords = countValidWords + 1
				if countValidWords > 10:
					break
		time2 = time.time()
		print "Total time taken for stop words is: " + str(time2 - time1)
				
		
		
		
		#generating a dictionary of the most frequently appearing words list
		dict_most_freq_words=dict([(k,v) for k,v in word_lst])
		
		
		#print dict_most_freq_words
		print "KEYS are      :"
		print dict_most_freq_words.keys()
		
		
		# break the input up into sentences.  working_sentences is used 
		# for the analysis, but actual_sentences is used in the results
		# so capitalization will be correct.
		
		time1 = time.time()
		sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
		actual_sentences = sent_detector.tokenize(input)
		working_sentences = [sentence.lower() 
			for sentence in actual_sentences]
		time2 = time.time()	
		print "Time taken to tokenize sentences is: " +str(time2-time1)
		
		
		# iterate over the most frequent words, and add the first sentence
		# that inclues each word to the result.
		output_sentences = []
		
		#generate the respective IDF of the top 15 most frequently appearing words
		time1 = time.time()
		TFIDF = generateTfidf(dict_most_freq_words)
		time2 = time.time()
		print "Time taken in IDF function is: " +str(time2-time1)
		
				
		print "TFIDF is"
		print TFIDF
		
		#Sort the TFIDF words
		time1 = time.time()
		TFIDF_after_SORT = sorter(TFIDF)
		time2 = time.time()
		print "Time taken in SORT function is: " +str(time2-time1)
		
		print "TFIDF after sorting is"
		print TFIDF_after_SORT
		
		#Create a dictionary of highest ranked words
		highest_ranked_words_dict = dict()
		highest_ranked_words_dict = dict([(k,v) for k,v in TFIDF_after_SORT])
		
		
		highest_ranked_words = highest_ranked_words_dict.keys()
		
		#Collect the sentences from the input which contains the TFIDF words
		time1 = time.time()
		for word in highest_ranked_words:
			for i in range(0, len(working_sentences)):
				if (word in working_sentences[i] 
				  and actual_sentences[i] not in output_sentences):
					output_sentences.append(actual_sentences[i])
					break
				if len(output_sentences) >= num_sentences: break
			if len(output_sentences) >= num_sentences: break
		time2 = time.time()
		print "Time taken to generate OP sentences is: " +str(time2-time1)
		
		# sort the output sentences back to their original order
		time1 = time.time()
		output_sentences = self.reorder_sentences(output_sentences, input)
		time2 = time.time()
		print "Time taken to reorder OP sentences is: " +str(time2-time1)
		
		
		# concatinate the sentences into a single string
		return "  ".join(output_sentences)