def count_keys(tweet): tokenize = lambda s : re.findall(r'\w+\'?', s, flags=re.UNICODE) keywords = session.query(Keyword).all() for keyword in keywords: alternative_keys = get_alternative_keys(keywords.words) if any([key in tokenize(normalize(tweet.text)) for key in alternative_keys]): keyword.counter.counter = keyword.counter.counter + 1 try: write_sentiment(tweet) except (UnicodeDecodeError, UnicodeEncodeError): logger.warning("A tweet was not UTF8-encoded correctly.", exc_info=True) session.commit()
def save_tweet(data): from sqlalchemy.exc import IntegrityError tweet = Tweet() tweet.id = int(data["id"]) tweet.text = data["text"] tweet.retweet_count = int(data["retweet_count"]) tweet.date = datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y") try: session.add(tweet) session.commit() except IntegrityError: logger.error("Error in adding tweet %r" % tweet, exc_info=True) session.rollback() return tweet