def __init__(self, search_terms): logging.info("initializing TwitterStream Kafka") # globals to all instances self.t = Twarc(localConfig.client_key, localConfig.client_secret, localConfig.access_token, localConfig.access_token_secret) self.search_terms = search_terms
def __init__(self): try: self.consumer = KafkaConsumer("betweezered", group_id="betweezered_consumer", metadata_broker_list=["localhost:9092"]) logging.info("Initialized Apache Kafka connection.") except: logging.warning("Could not initialize Apache Kafka connection.")
def __init__(self, search_terms): logging.info("initializing LocalStream Kafka") # globals to all instances self.search_terms = search_terms fhand = open('tests/real_tweets.csv','r') self.lt = [] for line in fhand: self.lt.append(line)
def __init__(self, search_terms): logging.info("initializing LocalStream Kafka") # globals to all instances self.search_terms = search_terms fhand = open('tests/real_tweets.csv', 'r') self.lt = [] for line in fhand: self.lt.append(line)
def processMessage(self, message): try: # retrieve payload and parse payload = json.loads(message.value) logging.info("tweet text: %s" % payload['text']) # insert into MongoDB try: # insert tweet into Mongo payload['id'] = str(payload['id']) # convert id to string tweet = models.MongoTweet(**payload) tweet.save() logging.info("tweet inserted into db, id %s" % tweet.id) except Exception, e: logging.warning("could not insert into db. error: %s" % e) except Exception, e: logging.warning(e)
def search(collection): ''' Consider pushing to worker queue (celery, rq, etc.) ''' logging.info("Performing search for %s" % collection) # retrieve collection mongo record try: c = models.Collection.objects.get(name=collection) logging.info("Retrieved %s %s" % (c.name,c.id)) except DoesNotExist: logging.info("collection does not exist") return jsonify({"status":False}) # run search (where collection name is used for ) archive_dir = "/".join([localConfig.archive_directory,c.name]) # search_terms = ",".join(c.search_terms) logging.debug("Passing %s %s" % (c.search_terms,archive_dir)) archive_log = utils.search_and_archive(collection, c.search_terms, archive_dir) return jsonify({"archive_log":archive_log}) # # return json for tweet # @app.route("/{prefix}/tweets/<limit>".format(prefix=localConfig.twitore_app_prefix), methods=['GET', 'POST']) # def tweet(limit): # renderdict = {} # # return tweet json # renderdict['tweets'] = models.MongoTweet.objects().limit(int(limit)) # renderdict['count'] = models.MongoTweet.objects.count() # renderdict['limit'] = limit # # add search terms # renderdict['search_terms'] = localConfig.search_terms # return render_template('tweets.htm',renderdict=renderdict) # # cron # @app.route("/{prefix}/jobs".format(prefix=localConfig.twitore_app_prefix), methods=['GET', 'POST']) # def jobs(): # # get crontab for current user # mycron = CronTab(user=True) # # get all jobs # jobs = mycron.crons # localConfig.logging.debug(jobs) # #render page # return render_template('jobs.html',jobs=jobs)