Esempio n. 1
0
def generateSentencesAndCheckErrors(infile, K):
	model, tweets = getModelAndTweetsFromFile(infile, K)
	res = []
	for i in range(100):
		random_tweet = np.random.choice(tweets)
		tweet_start = random_tweet.split(" ")[:K-1]
		num_words_wanted = len(random_tweet.split(" ")) - (K-1)
		generated_sentence = generateSentenceFromStartingKmer(tweet_start, K, num_words_wanted, model)
		updated_tweet, updated_generated_sentence = match_sentence_lengths(random_tweet, generated_sentence)
		result = compareSentences(updated_tweet, updated_generated_sentence)
		if result == None:
			continue
		res.append(result)
		sys.stdout.flush()
	print res
	print "Average:", sum(res)/float(len(res))
	return res
Esempio n. 2
0
def generateTweetsFromFile(infile, K):

	#initialize array of tweets
	model, tweets = getModelAndTweetsFromFile(infile, K)

	starting_kmers = getStartingKMers(K-1, tweets)

	# make original tweet
	i = 0
	tries = 0
	while i < 100 and tries < 1000:
		tries += 1
		#randomly pick a starting KMer
		sentence = starting_kmers[np.random.choice(range(len(starting_kmers)))]
		generated_tweet = generateSentenceFromStartingKmer(sentence,K, 20, model)
		# If the tweet was one of the originals, get another one.
		if checkIfSentenceInDatabase(generated_tweet, tweets):
			continue
		print generated_tweet
		i += 1
	return