def get_user(screen_name): """ Get user from db args: screen_name (str): Twitter user screen_name """ session = models.load_session() return session.query(models.User).filter(models.User.screen_name==screen_name).first()
def get_user(screen_name): """ Get user from db args: screen_name (str): Twitter user screen_name """ session = models.load_session() return session.query( models.User).filter(models.User.screen_name == screen_name).first()
def _get_random_user(last_updated=None, user_main_delta=None, user_full_delta=None, user_delta=None): """ get ranfom user from db acording to some rules. Check get_random_user() for mor info. args: last_updated (str): Field name for colum that stores the last updated information. Tweets, friendship and user info, has their own field where last updated for each of them is stored user_main_delta (int): Days main users can go before it has to be updated again user_full_delta (in): Days full users (high degree but not main) can go before it has to be updated again user_delta (in): Days regular users can go before it has to be updated again """ session = models.load_session() model = models.User if not last_updated or not user_main_delta: return #1. If main user never scraped users = session.query(model).filter(getattr(model, last_updated) == None, model.main == True) if users.first(): return users #2. If main user to long since scraped tresh = datetime.datetime.now() - datetime.timedelta(days=user_main_delta) users = session.query(model).filter(getattr(model, last_updated) < tresh, model.main == True) if users.first(): return users if not user_full_delta: return #3. If user with degree priority high enough to be fully scraped never was scraped users = session.query(model).filter(model.degree <= twitter_config.DEGREE_FULL_SCRAPE, getattr(model, last_updated) == None) if users.first(): return users #4. If user with degree priority high enough to be fully scraped is to long since updated tresh = datetime.datetime.now() - datetime.timedelta(days=user_full_delta) users = session.query(model).filter(model.degree <= twitter_config.DEGREE_FULL_SCRAPE, getattr(model, last_updated) < tresh) if users.first(): return users if not user_delta: return #5. if user never scraped users = session.query(model).filter(getattr(model, last_updated) == None) if users.first(): return users #6. If user to old since update tresh = datetime.datetime.now() - datetime.timedelta(days=user_delta) users = session.query(model).filter(getattr(model, last_updated) < tresh) return users
def exist_tweet(tweet_id): """ Check if tweet exist in db args: tweet_id (str): twitter tweets real id """ session = models.load_session() if session.query(models.Tweets).filter_by(id=tweet_id).first() != None: return True return False
def exist_user(user): """ Check if user exist in db args: user (str): twitter user screen_name """ session = models.load_session() if session.query(models.User).filter_by(screen_name=user).first() != None: return True return False
def get_user_degree(user): """ Get users degree of importance. args: user (srt): Twitter user screen_name """ session = models.load_session() user = session.query(models.User).filter_by(screen_name=user).first() if user: return user.degree return None
def exist_friendship(following, follower): """ Check if friendship exist in db args: following (str): user screen_name (It is db user id). It is the person being followed follower (str): user screen_name (It is db user id). It is the person following the other user """ session = models.load_session() if session.query(models.Friendship).filter_by(follower_id=follower, following_id=following).first() != None: return True return False
def exist_friendship(following, follower): """ Check if friendship exist in db args: following (str): user screen_name (It is db user id). It is the person being followed follower (str): user screen_name (It is db user id). It is the person following the other user """ session = models.load_session() if session.query(models.Friendship).filter_by( follower_id=follower, following_id=following).first() != None: return True return False
def update_user(data): """ Update user info in db args: data (dict): User fields to be updated """ ctx = _user_prep(data) session = models.load_session() session.query(models.User).filter_by(screen_name=data["user"]).update(ctx) session.commit()
def add_new_user(user, degree=1, main=False): """ IF user exist, then update it with given degree and main args: user (str): twitter user screen_name degree (int): users degree of seperation main (bool): it this a main user or not """ session = models.load_session() friend = models.User(screen_name=user, degree=degree, main=main) try: session.add(friend) session.commit() except: session.rollback() session.query(models.User).filter_by(screen_name=user).update({"degree":degree, "main":main}) session.commit()
def store_tweets(data, new=False): """ Store user`s tweets PRE:User must exist from before args: data (dict): User and its friendship links new (bool): Delete all friendship links and scrape them all again or scrape only new ones """ user = data["user"] items = data["data"] session = models.load_session() #Update users friendship date scraped info muser = session.query(models.User).filter_by(screen_name=user).first() muser.tweets_last_updated = datetime.datetime.now() if not muser.tweets_last_update_full or new: muser.tweets_last_update_full = datetime.datetime.now() session.commit() for item in items: tweet = models.Tweets(screen_name=user, poster_screen_name=item["poster_screen_name"], poster_display_name=item["poster_display_name"], text=item["text"], text_html=item["text_html"], id=item["id"], posted_date=item["posted_date"], retweets=item["retweets"], favorites=item["favorites"], inline_media=item["inline_media"]) try: session.add(tweet) session.commit() except: if new: session.rollback() else: break
def add_new_user(user, degree=1, main=False): """ IF user exist, then update it with given degree and main args: user (str): twitter user screen_name degree (int): users degree of seperation main (bool): it this a main user or not """ session = models.load_session() friend = models.User(screen_name=user, degree=degree, main=main) try: session.add(friend) session.commit() except: session.rollback() session.query(models.User).filter_by(screen_name=user).update({ "degree": degree, "main": main }) session.commit()
def _store_friendships(data, followers=True, new=False): """ Store users friendship links PRE:User must exist from before args: data (dict): User and its friendship links followers (bool): Followers or following? new (bool): Delete all friendship links and scrape them all again or scrape only new ones """ user = data["user"] items = data["data"] session = models.load_session() #Update users friendship date scraped info muser = session.query(models.User).filter_by(screen_name=user).first() muser.friendships_last_updated = datetime.datetime.now() if not muser.friendships_last_update_full or new: muser.friendships_last_update_full = datetime.datetime.now() session.commit() #If new then DELETE all friendships for this user. All will be scraped again if new: if followers: session.query(models.Friendship).filter(models.Friendship.following_id == user).delete(synchronize_session=False) else: session.query(models.Friendship).filter(models.Friendship.follower_id == user).delete(synchronize_session=False) session.commit() #Get degree for this new user degree = session.query(models.User).filter_by(screen_name=user).first().degree + 1 logging.debug(u"start storing friendships") for item in items: logging.debug(u"store friendship with: "+unicode(item["screen_name"])) friend = models.User(screen_name=item["screen_name"],display_name=item["display_name"], protected = item["protected"], degree=degree) try: session.add(friend) session.commit() except: logging.debug("ROLLBACK on storing friend") session.rollback() if followers: friendship = models.Friendship(following_id=user, follower_id=item["screen_name"], date_added=datetime.datetime.now()) else: friendship = models.Friendship(following_id=item["screen_name"], follower_id=user, date_added=datetime.datetime.now()) try: session.add(friendship) session.commit() except: if new: session.rollback() logging.debug("ROLLBACK on storing friendship") else: #If this friendship exist, then the rest should exist too. logging.debug("Commiting friendship failed - friendship likely exist") break
def _store_friendships(data, followers=True, new=False): """ Store users friendship links PRE:User must exist from before args: data (dict): User and its friendship links followers (bool): Followers or following? new (bool): Delete all friendship links and scrape them all again or scrape only new ones """ user = data["user"] items = data["data"] session = models.load_session() #Update users friendship date scraped info muser = session.query(models.User).filter_by(screen_name=user).first() muser.friendships_last_updated = datetime.datetime.now() if not muser.friendships_last_update_full or new: muser.friendships_last_update_full = datetime.datetime.now() session.commit() #If new then DELETE all friendships for this user. All will be scraped again if new: if followers: session.query(models.Friendship).filter( models.Friendship.following_id == user).delete( synchronize_session=False) else: session.query(models.Friendship).filter( models.Friendship.follower_id == user).delete( synchronize_session=False) session.commit() #Get degree for this new user degree = session.query( models.User).filter_by(screen_name=user).first().degree + 1 logging.debug(u"start storing friendships") for item in items: logging.debug(u"store friendship with: " + unicode(item["screen_name"])) friend = models.User(screen_name=item["screen_name"], display_name=item["display_name"], protected=item["protected"], degree=degree) try: session.add(friend) session.commit() except: logging.debug("ROLLBACK on storing friend") session.rollback() if followers: friendship = models.Friendship(following_id=user, follower_id=item["screen_name"], date_added=datetime.datetime.now()) else: friendship = models.Friendship(following_id=item["screen_name"], follower_id=user, date_added=datetime.datetime.now()) try: session.add(friendship) session.commit() except: if new: session.rollback() logging.debug("ROLLBACK on storing friendship") else: #If this friendship exist, then the rest should exist too. logging.debug( "Commiting friendship failed - friendship likely exist") break
def _get_random_user(last_updated=None, user_main_delta=None, user_full_delta=None, user_delta=None): """ get ranfom user from db acording to some rules. Check get_random_user() for mor info. args: last_updated (str): Field name for colum that stores the last updated information. Tweets, friendship and user info, has their own field where last updated for each of them is stored user_main_delta (int): Days main users can go before it has to be updated again user_full_delta (in): Days full users (high degree but not main) can go before it has to be updated again user_delta (in): Days regular users can go before it has to be updated again """ session = models.load_session() model = models.User if not last_updated or not user_main_delta: return #1. If main user never scraped users = session.query(model).filter( getattr(model, last_updated) == None, model.main == True) if users.first(): return users #2. If main user to long since scraped tresh = datetime.datetime.now() - datetime.timedelta(days=user_main_delta) users = session.query(model).filter( getattr(model, last_updated) < tresh, model.main == True) if users.first(): return users if not user_full_delta: return #3. If user with degree priority high enough to be fully scraped never was scraped users = session.query(model).filter( model.degree <= twitter_config.DEGREE_FULL_SCRAPE, getattr(model, last_updated) == None) if users.first(): return users #4. If user with degree priority high enough to be fully scraped is to long since updated tresh = datetime.datetime.now() - datetime.timedelta(days=user_full_delta) users = session.query(model).filter( model.degree <= twitter_config.DEGREE_FULL_SCRAPE, getattr(model, last_updated) < tresh) if users.first(): return users if not user_delta: return #5. if user never scraped users = session.query(model).filter(getattr(model, last_updated) == None) if users.first(): return users #6. If user to old since update tresh = datetime.datetime.now() - datetime.timedelta(days=user_delta) users = session.query(model).filter(getattr(model, last_updated) < tresh) return users