Example #1
0
def get_user(screen_name):
    """
        Get user from db
        
        args:
            screen_name (str): Twitter user screen_name
    """
    session = models.load_session()
    return session.query(models.User).filter(models.User.screen_name==screen_name).first()
Example #2
0
def get_user(screen_name):
    """
        Get user from db
        
        args:
            screen_name (str): Twitter user screen_name
    """
    session = models.load_session()
    return session.query(
        models.User).filter(models.User.screen_name == screen_name).first()
Example #3
0
def _get_random_user(last_updated=None, user_main_delta=None, user_full_delta=None, user_delta=None):
    """
        get ranfom user from db acording to some rules.
        Check get_random_user() for mor info. 
        
        args:
            last_updated (str): Field name for colum that stores the last updated information.
                Tweets, friendship and user info, has their own field where last updated for each of them is stored
            user_main_delta (int): Days main users can go before it has to be updated again
            user_full_delta (in): Days full users (high degree but not main) can go before it has to be updated again
            user_delta (in): Days regular users can go before it has to be updated again
    """
    session = models.load_session()
    model = models.User
    
    if not last_updated or not user_main_delta:
        return        
    #1. If main user never scraped
    users = session.query(model).filter(getattr(model, last_updated) == None, model.main == True)
    if users.first():
        return users
    
    #2. If main user to long since scraped
    tresh = datetime.datetime.now() - datetime.timedelta(days=user_main_delta)
    users = session.query(model).filter(getattr(model, last_updated) < tresh, model.main == True)
    if users.first():
        return users
    
    
    if not user_full_delta:
        return
    #3. If user with degree priority high enough to be fully scraped never was scraped
    users = session.query(model).filter(model.degree <= twitter_config.DEGREE_FULL_SCRAPE,
                                        getattr(model, last_updated) == None)
    if users.first():
        return users
    
    #4. If user with degree priority high enough to be fully scraped is to long since updated
    tresh = datetime.datetime.now() - datetime.timedelta(days=user_full_delta)
    users = session.query(model).filter(model.degree <= twitter_config.DEGREE_FULL_SCRAPE,
                                        getattr(model, last_updated) < tresh)
    if users.first():
        return users
        
    if not user_delta:
        return
    #5. if user never scraped
    users = session.query(model).filter(getattr(model, last_updated) == None)
    if users.first():
        return users
    
    #6. If user to old since update
    tresh = datetime.datetime.now() - datetime.timedelta(days=user_delta)
    users = session.query(model).filter(getattr(model, last_updated) < tresh)
    return users
Example #4
0
def exist_tweet(tweet_id):
    """
        Check if tweet exist in db
        
        args:
            tweet_id (str): twitter tweets real id
    """
    session = models.load_session()
    if session.query(models.Tweets).filter_by(id=tweet_id).first() != None:
        return True
    return False
Example #5
0
def exist_user(user):
    """
        Check if user exist in db
        
        args:
            user (str): twitter user screen_name
    """
    session = models.load_session()
    if session.query(models.User).filter_by(screen_name=user).first() != None:
        return True
    return False
Example #6
0
def exist_tweet(tweet_id):
    """
        Check if tweet exist in db
        
        args:
            tweet_id (str): twitter tweets real id
    """
    session = models.load_session()
    if session.query(models.Tweets).filter_by(id=tweet_id).first() != None:
        return True
    return False
Example #7
0
def exist_user(user):
    """
        Check if user exist in db
        
        args:
            user (str): twitter user screen_name
    """
    session = models.load_session()
    if session.query(models.User).filter_by(screen_name=user).first() != None:
        return True
    return False
Example #8
0
def get_user_degree(user):
    """
        Get users degree of importance.
        
        args:
            user (srt): Twitter user screen_name
    """
    session = models.load_session()
    user = session.query(models.User).filter_by(screen_name=user).first()
    if user:
        return user.degree
    return None
Example #9
0
def get_user_degree(user):
    """
        Get users degree of importance.
        
        args:
            user (srt): Twitter user screen_name
    """
    session = models.load_session()
    user = session.query(models.User).filter_by(screen_name=user).first()
    if user:
        return user.degree
    return None
Example #10
0
def exist_friendship(following, follower):
    """
        Check if friendship exist in db
        
        args:
            following (str): user screen_name (It is db user id). It is the person being followed
            follower (str): user screen_name (It is db user id). It is the person following the other user
        
    """
    session = models.load_session()
    if session.query(models.Friendship).filter_by(follower_id=follower, following_id=following).first() != None:
        return True
    return False
Example #11
0
def exist_friendship(following, follower):
    """
        Check if friendship exist in db
        
        args:
            following (str): user screen_name (It is db user id). It is the person being followed
            follower (str): user screen_name (It is db user id). It is the person following the other user
        
    """
    session = models.load_session()
    if session.query(models.Friendship).filter_by(
            follower_id=follower, following_id=following).first() != None:
        return True
    return False
Example #12
0
def update_user(data):
    """
        Update user info in db
        
        args:
            data (dict): User fields to be updated
    """

    ctx = _user_prep(data)

    session = models.load_session()

    session.query(models.User).filter_by(screen_name=data["user"]).update(ctx)

    session.commit()
Example #13
0
def update_user(data):
    """
        Update user info in db
        
        args:
            data (dict): User fields to be updated
    """
    
    ctx = _user_prep(data)    
    
    session = models.load_session()
    
    session.query(models.User).filter_by(screen_name=data["user"]).update(ctx)
    
    session.commit()
Example #14
0
def add_new_user(user, degree=1, main=False):
    """
        IF user exist, then update it with given degree and main
        
        args:
            user (str): twitter user screen_name
            degree (int): users degree of seperation
            main (bool): it this a main user or not
    """
    session = models.load_session()
    friend = models.User(screen_name=user, degree=degree, main=main)
    try:
        session.add(friend)
        session.commit()
    except:
        session.rollback()
        session.query(models.User).filter_by(screen_name=user).update({"degree":degree, "main":main})
        session.commit()
Example #15
0
def store_tweets(data, new=False):
    """
        Store user`s tweets
        
        PRE:User must exist from before
        
        args:
            data (dict): User and its friendship links
            new (bool): Delete all friendship links and scrape them all again or scrape only new ones  
    """
    user = data["user"]
    items = data["data"]

    session = models.load_session()

    #Update users friendship date scraped info
    muser = session.query(models.User).filter_by(screen_name=user).first()
    muser.tweets_last_updated = datetime.datetime.now()
    if not muser.tweets_last_update_full or new:
        muser.tweets_last_update_full = datetime.datetime.now()
    session.commit()

    for item in items:
        tweet = models.Tweets(screen_name=user,
                              poster_screen_name=item["poster_screen_name"],
                              poster_display_name=item["poster_display_name"],
                              text=item["text"],
                              text_html=item["text_html"],
                              id=item["id"],
                              posted_date=item["posted_date"],
                              retweets=item["retweets"],
                              favorites=item["favorites"],
                              inline_media=item["inline_media"])
        try:
            session.add(tweet)
            session.commit()
        except:
            if new:
                session.rollback()
            else:
                break
Example #16
0
def add_new_user(user, degree=1, main=False):
    """
        IF user exist, then update it with given degree and main
        
        args:
            user (str): twitter user screen_name
            degree (int): users degree of seperation
            main (bool): it this a main user or not
    """
    session = models.load_session()
    friend = models.User(screen_name=user, degree=degree, main=main)
    try:
        session.add(friend)
        session.commit()
    except:
        session.rollback()
        session.query(models.User).filter_by(screen_name=user).update({
            "degree":
            degree,
            "main":
            main
        })
        session.commit()
Example #17
0
def store_tweets(data, new=False):
    """
        Store user`s tweets
        
        PRE:User must exist from before
        
        args:
            data (dict): User and its friendship links
            new (bool): Delete all friendship links and scrape them all again or scrape only new ones  
    """
    user = data["user"]    
    items = data["data"]
    
    session = models.load_session()
    
    #Update users friendship date scraped info
    muser = session.query(models.User).filter_by(screen_name=user).first()
    muser.tweets_last_updated = datetime.datetime.now()
    if not muser.tweets_last_update_full or new:
        muser.tweets_last_update_full = datetime.datetime.now()
    session.commit()
    
    
    
    for item in items:       
        tweet = models.Tweets(screen_name=user, poster_screen_name=item["poster_screen_name"],
                                  poster_display_name=item["poster_display_name"], text=item["text"],
                                  text_html=item["text_html"], id=item["id"], posted_date=item["posted_date"],
                                  retweets=item["retweets"], favorites=item["favorites"], inline_media=item["inline_media"])
        try:
            session.add(tweet)
            session.commit()
        except:
            if new:
                session.rollback()
            else:
                break
Example #18
0
def _store_friendships(data, followers=True, new=False):
    """
        Store users friendship links 
        
        PRE:User must exist from before
        
        args:
            data (dict): User and its friendship links
            followers (bool): Followers or following?
            new (bool): Delete all friendship links and scrape them all again or scrape only new ones  
    """
    
    user = data["user"]    
    items = data["data"]
    
    session = models.load_session()
    
    #Update users friendship date scraped info
    muser = session.query(models.User).filter_by(screen_name=user).first()
    muser.friendships_last_updated = datetime.datetime.now()
    if not muser.friendships_last_update_full or new:
        muser.friendships_last_update_full = datetime.datetime.now()
    session.commit()
    
    #If new then DELETE all friendships for this user. All will be scraped again
    if new:
        if followers:
            session.query(models.Friendship).filter(models.Friendship.following_id == user).delete(synchronize_session=False)
        else:
            session.query(models.Friendship).filter(models.Friendship.follower_id == user).delete(synchronize_session=False)
        session.commit()
            
    
    #Get degree for this new user
    degree = session.query(models.User).filter_by(screen_name=user).first().degree + 1
    
    logging.debug(u"start storing friendships")
    for item in items:
        logging.debug(u"store friendship with: "+unicode(item["screen_name"]))
        friend = models.User(screen_name=item["screen_name"],display_name=item["display_name"],
                             protected = item["protected"], degree=degree)
        try:
            session.add(friend)
            session.commit()
        except:
            logging.debug("ROLLBACK on storing friend")
            session.rollback()
        
        if followers:            
            friendship = models.Friendship(following_id=user, follower_id=item["screen_name"], date_added=datetime.datetime.now())
        else:            
            friendship = models.Friendship(following_id=item["screen_name"], follower_id=user, date_added=datetime.datetime.now())
            
        try:
            session.add(friendship)
            session.commit()
        except:
            if new:
                session.rollback()
                logging.debug("ROLLBACK on storing friendship")
            else:
                #If this friendship exist, then the rest should exist too.
                logging.debug("Commiting friendship failed - friendship likely exist")
                break
Example #19
0
def _store_friendships(data, followers=True, new=False):
    """
        Store users friendship links 
        
        PRE:User must exist from before
        
        args:
            data (dict): User and its friendship links
            followers (bool): Followers or following?
            new (bool): Delete all friendship links and scrape them all again or scrape only new ones  
    """

    user = data["user"]
    items = data["data"]

    session = models.load_session()

    #Update users friendship date scraped info
    muser = session.query(models.User).filter_by(screen_name=user).first()
    muser.friendships_last_updated = datetime.datetime.now()
    if not muser.friendships_last_update_full or new:
        muser.friendships_last_update_full = datetime.datetime.now()
    session.commit()

    #If new then DELETE all friendships for this user. All will be scraped again
    if new:
        if followers:
            session.query(models.Friendship).filter(
                models.Friendship.following_id == user).delete(
                    synchronize_session=False)
        else:
            session.query(models.Friendship).filter(
                models.Friendship.follower_id == user).delete(
                    synchronize_session=False)
        session.commit()

    #Get degree for this new user
    degree = session.query(
        models.User).filter_by(screen_name=user).first().degree + 1

    logging.debug(u"start storing friendships")
    for item in items:
        logging.debug(u"store friendship with: " +
                      unicode(item["screen_name"]))
        friend = models.User(screen_name=item["screen_name"],
                             display_name=item["display_name"],
                             protected=item["protected"],
                             degree=degree)
        try:
            session.add(friend)
            session.commit()
        except:
            logging.debug("ROLLBACK on storing friend")
            session.rollback()

        if followers:
            friendship = models.Friendship(following_id=user,
                                           follower_id=item["screen_name"],
                                           date_added=datetime.datetime.now())
        else:
            friendship = models.Friendship(following_id=item["screen_name"],
                                           follower_id=user,
                                           date_added=datetime.datetime.now())

        try:
            session.add(friendship)
            session.commit()
        except:
            if new:
                session.rollback()
                logging.debug("ROLLBACK on storing friendship")
            else:
                #If this friendship exist, then the rest should exist too.
                logging.debug(
                    "Commiting friendship failed - friendship likely exist")
                break
Example #20
0
def _get_random_user(last_updated=None,
                     user_main_delta=None,
                     user_full_delta=None,
                     user_delta=None):
    """
        get ranfom user from db acording to some rules.
        Check get_random_user() for mor info. 
        
        args:
            last_updated (str): Field name for colum that stores the last updated information.
                Tweets, friendship and user info, has their own field where last updated for each of them is stored
            user_main_delta (int): Days main users can go before it has to be updated again
            user_full_delta (in): Days full users (high degree but not main) can go before it has to be updated again
            user_delta (in): Days regular users can go before it has to be updated again
    """
    session = models.load_session()
    model = models.User

    if not last_updated or not user_main_delta:
        return
    #1. If main user never scraped
    users = session.query(model).filter(
        getattr(model, last_updated) == None, model.main == True)
    if users.first():
        return users

    #2. If main user to long since scraped
    tresh = datetime.datetime.now() - datetime.timedelta(days=user_main_delta)
    users = session.query(model).filter(
        getattr(model, last_updated) < tresh, model.main == True)
    if users.first():
        return users

    if not user_full_delta:
        return
    #3. If user with degree priority high enough to be fully scraped never was scraped
    users = session.query(model).filter(
        model.degree <= twitter_config.DEGREE_FULL_SCRAPE,
        getattr(model, last_updated) == None)
    if users.first():
        return users

    #4. If user with degree priority high enough to be fully scraped is to long since updated
    tresh = datetime.datetime.now() - datetime.timedelta(days=user_full_delta)
    users = session.query(model).filter(
        model.degree <= twitter_config.DEGREE_FULL_SCRAPE,
        getattr(model, last_updated) < tresh)
    if users.first():
        return users

    if not user_delta:
        return
    #5. if user never scraped
    users = session.query(model).filter(getattr(model, last_updated) == None)
    if users.first():
        return users

    #6. If user to old since update
    tresh = datetime.datetime.now() - datetime.timedelta(days=user_delta)
    users = session.query(model).filter(getattr(model, last_updated) < tresh)
    return users