def update_timestamp_data(self, timestamp): try: update_statement = self.data.update().where( self.data.c.data_timestamp == timestamp).values(status=True) self.con.execute(update_statement) except Exception as e: log.error(e)
def on_status(self, status): """ Handle logic when the data coming """ try: tweet = json.loads(status) # Update sentiment score tweet["sentiment"] = SentimentAnalysis.get_sentiment(tweet_text=tweet["text"]) self.tw_store.save_tweet(tweet) except Exception as e: log.error(e)
def execute(self): """Execute the twitter crawler, loop into the keyword_list""" while True: log.info("Star crawling back....") delay = 600 for keyword in self.keyword_list: log.info('Crawl data for %s', keyword["keyword"]) try: self.crawl(keyword) except Exception as e: log.error('Error in Crawling process', exc_info=True) log.info("Sleeping for %ds", delay) sleep(delay) # Sleep for 10 minutes after finishing crawl all of the keyword, # and start over again log.info("Sleeping for %ds...", delay) sleep(delay)
def execute(self): """Execute the twitter crawler, loop into the keyword_list """ listen = TwitterStream(self.tw_store) stream = tweepy.Stream(self.auth, listen) loop = True while loop: try: log.info("Start stream tweets data") stream.filter(locations=AUS_GEO_CODE) loop = False log.info("End stream tweets data") except Exception as e: log.error("There's an error, sleep for 10 minutes") log.error(e) loop = True stream.disconnect() time.sleep(600) continue
def connect(self): '''Connect to database then set con and meta attributes''' # We connect with the help of the PostgreSQL URL # postgresql://federer:grandestslam@localhost:5432/tennis url = 'postgresql://{}:{}@{}:{}/{}' url = url.format(self.user, self.password, self.host, self.port, self.database_name) try: # The return value of create_engine() is our connection object con = sqlalchemy.create_engine(url, client_encoding='utf8') # We then bind the connection to MetaData() meta = sqlalchemy.MetaData(bind=con, reflect=True) except Exception as ex: log.error(ex) return False self.con = con self.meta = meta return True
from app.logger import LOGGER as log import settings ALL_DOCS_VIEW = '_all_docs' try: log.info("START db updater script") log.info("-----------------------") server = couchdb.Server(url=settings.COUCHDB_SERVER) db = server[settings.COUCHDB_DB] info = db.info() doc_count = info["doc_count"] num_per_request = 10000 iteration = math.ceil(doc_count / num_per_request) for i in range(iteration): log.info('Run %d iteration' % i) for row in db.view(ALL_DOCS_VIEW, limit=num_per_request, skip=i * num_per_request): data = db.get(row.id) data["sentiment"] = SentimentAnalysis.get_sentiment(data["text"]) db.save(data) log.info('%d iteration success') log.info("FINISH db updater script") except Exception as e: log.error(e)
def get_sentiment(tweet_text): try: analyzer = SentimentIntensityAnalyzer() return analyzer.polarity_scores(tweet_text) except Exception as e: log.error(e)