def tweet_to_Tweet(tweet, select_fields=tweet_fields): """ Take tweet json and convert into a Tweet object """ info = {k:v for (k,v) in tweet.items() if k in select_fields} # pymssql does not support past basic multilingual plane yet so we strip # https://github.com/pymssql/pymssql/issues/300 info['text'] = xuni(strip_suppl_unicode(tweet['text'])) info['location'] = place_to_Location(tweet['place']) info['user'] = user_to_TwitterUser(tweet['user']) # Create blank tweet objects for other tweets related to this tweet. # In later processing, a db.merge will be done to this tweet, which'll # populate these fields with whatever already exists in the db or session. # note: do NOT prematurely do a db.merge() -- that'll add this tweet to the # session and end up with duplicates & key conflicts when committing later. # this also applies to the location & user we set above. if ('retweeted_status' in tweet and tweet['retweeted_status'] is not None): info['retweeted_status'] = \ Tweet(id_str=tweet['retweeted_status']['id_str']) if ('in_reply_to_status_id_str' in tweet and tweet['in_reply_to_status_id_str'] is not None): info['in_reply_to'] = Tweet(id_str=tweet['in_reply_to_status_id_str']) return Tweet(**info)
def classify(db_session, request): """ Classify reviews and return YelpClassify() """ logger.info("SETUP") # setup test database mock db_session.side_effect = get_db_session # populate database clear_tables() for line in open(os.path.join(__location__, "populate.sql"), "r"): db.execute(xuni(line)) db.commit() yc = YelpClassify() yc.classify_reviews(every=True, verbose=1) return yc
def test_xuni_blank(): assert xuni(None) == u'' assert xuni('') == u''