def __init__(self): config = ConfigReader("config.json") auth = OAuthHandler(config.get_key("CONSUMER_KEY"), config.get_key("CONSUMER_SECRET")) auth.set_access_token(config.get_key("ACCESS_TOKEN_KEY"), config.get_key("ACCESS_TOKEN_SECRET")) self.api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) self.rate_limits = self.tweep_rate_limits_to_dictionary( self.api.rate_limit_status()) self.scrapers = [] kafka_url = "{:s}:{:s}".format(config.get_key("KAFKA_HOST"), config.get_key("KAFKA_PORT")) try: kafka = SimpleClient(kafka_url, timeout=60) except KafkaUnavailableError as e: logging.error("Could not connect to Kafka2") raise e self.producer = SimpleProducer(kafka)
try: with open("data/AFINN-96.txt") as file: sentiment_data = dict(csv.reader(file, delimiter='\t')) for key in sentiment_data: sentiment_data[key] = int(sentiment_data[key]) except IOError as e: logging.error("Could not open sentiment data file " + str(e.args)) exit(1) except ValueError: logging.error("Sentiment data file not valid") exit(1) config = ConfigReader("config.json") zookeeper_url = "{:s}:{:s}".format(config.get_key("ZOOKEEPER_HOST"), config.get_key("ZOOKEEPER_PORT")) kafka_topic = config.get_key("KAFKA_TOPIC") output_topic = config.get_key("KAFKA_OUTPUT_TOPIC") sc = SparkContext(appName="PythonTweetCleaner") sc.setLogLevel("ERROR") ssc = StreamingContext(sc, 10) kafka_params = {"startingOffsets": "earliest"} kafkaStream = KafkaUtils.createStream(ssc, zookeeper_url, 'spark-streaming', {kafka_topic: 1}, kafka_params) # output stream try: