from numpy.linalg import norm import os import os.path as op import itertools from gensim.models import FastText import pandas as pd # add path to utils module to python path basename = op.split(op.dirname(op.realpath(__file__)))[0] path_utils = op.join(basename, "utils") sys.path.insert(0, path_utils) from sys_utils import load_library, strInput2bool #from mongoDB_utils import connect_to_database load_library(op.join(basename, 'preprocess')) load_library(op.join(basename, 'readWrite')) load_library(op.join(basename, 'tweet_utils')) os.environ["HADOOP_HOME"] = "/space/hadoop/hadoop_home" from defines import ColumnNames as cn from defines import Patterns from readWrite import savePandasDFtoFile, readFile from tweet_utils import * from preprocess import Preprocess prep = Preprocess() # ---------------------------- def add_field_to_collection(collection,
from tweepy import Stream import time import argparse import sys import os import os.path as op # add path to utils module to python path basename = op.split(op.dirname(op.realpath(__file__)))[0] path_utils = op.join(basename, "utils") sys.path.insert(0, path_utils) from sys_utils import load_library load_library(op.join(basename, "readWrite")) from readWrite import * #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): print(data) return True def on_error(self, status): print(status) if __name__ == '__main__':