# getting only the useful information
    dfs = kafkaStream.map(lambda stream: stream[1].encode('utf-8'))
    # parsing data into a dictionary
    dfs = dfs.map(lambda raw: ast.literal_eval(raw))
    # filtering data on tweets that are not in English
    dfs = dfs.filter(lambda dictionary: dictionary.get('lang', '') == 'en')
    # filtering data on tweets that contain text
    # this part is a security against empty data
    dfs = dfs.filter(lambda dictionary: dictionary.get('text', '') != '')
    # changing words into tokens
    dfs = dfs.map(lambda dictionary: tokenize(text=dictionary.get('text', ''), common_words=common_words))
    # changing tokens into terms frequency sparse vectors
    dfs = dfs.map(lambda tokens: compute_tf(tokens, reference_table=reference_table))

    # making predictions using the logistic regression
    dfs_predictions = lr.predictOn(dfs)
    # preparing data to count positive and negative predictions
    dfs_predictions = dfs_predictions.map(lambda x: (x, 1))
    # computing positive and negative predictions counts
    dfs_predictions = dfs_predictions.reduceByKey(lambda x, y: x+y)

    # printing the results in the console
    dfs_predictions.pprint()

    # saving data into HBase
    dfs_predictions.foreachRDD(put_data_into_hbase)

    # starting streaming
    ssc.start()
    ssc.awaitTermination()