bloomfilter = BloomFilter(n, p)
print("Bit array size:{}".format(bloomfilter.size))
print("False positive probability is:{}".format(
    bloomfilter.probability_false_positive))
print("Number of hash functions used in the bloomfilter:{}".format(
    bloomfilter.number_of_hf))

#Computation of the root words from the dictionary
#Adding the words to the bloom filter
#This also ignores any non-ascii character from the tweet
root_words = []
false_positive = []
present_word = []
not_present_word = []
for w in words:
    bloomfilter.adding_item_bf(w)
    root_words.append(stemmer.stem(w).encode("ascii", "ignore"))


#The stream class to stream the tweets from the public API
#The tweets are cleaned
#The text in the tweet is tokenize to get the tokens
#False positive is computed by checking it in the word
class StdOutListener(StreamListener):
    def on_data(self, data):
        cnt = 0
        root_tokens = []
        try:
            data = json.loads(HTMLParser().unescape(data))
            cleaned_tweet = preprocessor.clean(data['text'].encode(
                "ascii", "ignore"))