Exemple #1
0
from numpy.linalg import norm
import os
import os.path as op
import itertools
from gensim.models import FastText
import pandas as pd

# add path to utils module to python path
basename = op.split(op.dirname(op.realpath(__file__)))[0]
path_utils = op.join(basename, "utils")
sys.path.insert(0, path_utils)

from sys_utils import load_library, strInput2bool
#from mongoDB_utils import connect_to_database

load_library(op.join(basename, 'preprocess'))
load_library(op.join(basename, 'readWrite'))
load_library(op.join(basename, 'tweet_utils'))
os.environ["HADOOP_HOME"] = "/space/hadoop/hadoop_home"

from defines import ColumnNames as cn
from defines import Patterns
from readWrite import savePandasDFtoFile, readFile
from tweet_utils import *
from preprocess import Preprocess
prep = Preprocess()

# ----------------------------


def add_field_to_collection(collection,
from tweepy import Stream
import time

import argparse
import sys
import os
import os.path as op

# add path to utils module to python path
basename = op.split(op.dirname(op.realpath(__file__)))[0]
path_utils = op.join(basename, "utils")
sys.path.insert(0, path_utils)

from sys_utils import load_library

load_library(op.join(basename, "readWrite"))

from readWrite import *


#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
    def on_data(self, data):
        print(data)
        return True

    def on_error(self, status):
        print(status)


if __name__ == '__main__':