Beispiel #1
0
def get_tweets_for_nonprofit(nonprofits_id):
    """Retrieve tweets for the given nonprofit and store them in the DB."""
    logger.debug('Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    max_tweet = DBSession.query(func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter(Tweet.twitter_name == nonprofit.twitter_name).first()
    if max_tweet is None or max_tweet.max_tweet_id is None:
        max_tweet_id = 1
    else:
        max_tweet_id = max_tweet.max_tweet_id

    tweets = []
    if nonprofit.twitter_id is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_id(nonprofit.twitter_id, True, since_id=max_tweet_id)
    elif nonprofit.twitter_name is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_name(nonprofit.twitter_name, True, since_id=max_tweet_id)
    else:
        pass

    for tweet in tweets:
        DBSession.add(Tweet(tweet['user']['screen_name'],
                            tweet['id_str'],
                            tweet['created_at'],
                            tweet['text'].encode('utf-8'),
                            tweet['lang'],
                            tweet['retweet_count'],
                            tweet['favorite_count'],
                            ', '.join([mention['id_str'] for mention in tweet['entities']['user_mentions']]),
                            ', '.join([mention['screen_name'] for mention in tweet['entities']['user_mentions']]),
                            ', '.join([hashtag['text'] for hashtag in tweet['entities']['hashtags']]),
                            ', '.join([url['expanded_url'] for url in tweet['entities']['urls']]),
                            tweet['in_reply_to_screen_name'],
                            tweet['in_reply_to_user_id_str'],
                            tweet['in_reply_to_status_id_str']))
    DBSession.commit()
Beispiel #2
0
def add_guidestar_info_to_db(ein):
    """Takes the EIN of a nonprofit as input. If the nonprofit is already in the DB, its info is updated.
    If the nonprofit is not in the DB, it is inserted."""
    logger.debug('Inside add_guidestar_info_to_db({0})'.format(ein))

    query = DBSession.query(Nonprofit).filter(Nonprofit.ein == ein)
    nonprofit_db = query.first()
    nonprofit_gs = givinggraph.guidestar.search.get_nonprofit(ein)
    if nonprofit_gs is None:
        return None

    if nonprofit_db is None:
        nonprofit_db = Nonprofit(nonprofit_gs.name,
                                 nonprofit_gs.ein,
                                 nonprofit_gs.ntee_code,
                                 nonprofit_gs.mission,
                                 nonprofit_gs.mission,
                                 None,
                                 None,
                                 nonprofit_gs.city,
                                 nonprofit_gs.state,
                                 nonprofit_gs.zip)
        DBSession.add(nonprofit_db)
    else:
        nonprofit_db.name = nonprofit_gs.name
        nonprofit_db.ntee_code = nonprofit_gs.ntee_code
        nonprofit_db.mission = nonprofit_gs.mission
        nonprofit_db.description = nonprofit_gs.mission
        nonprofit_db.city = nonprofit_gs.city
        nonprofit_db.state = nonprofit_gs.state
        nonprofit_db.ZIP = nonprofit_gs.zip
    DBSession.commit()
    return nonprofit_db
Beispiel #3
0
def add_guidestar_info_to_db(ein):
    """Takes the EIN of a nonprofit as input. If the nonprofit is already in the DB, its info is updated.
    If the nonprofit is not in the DB, it is inserted."""
    logger.debug('Inside add_guidestar_info_to_db({0})'.format(ein))

    query = DBSession.query(Nonprofit).filter(Nonprofit.ein == ein)
    nonprofit_db = query.first()
    nonprofit_gs = givinggraph.guidestar.search.get_nonprofit(ein)
    if nonprofit_gs is None:
        return None

    if nonprofit_db is None:
        nonprofit_db = Nonprofit(nonprofit_gs.name, nonprofit_gs.ein,
                                 nonprofit_gs.ntee_code, nonprofit_gs.mission,
                                 nonprofit_gs.mission, None, None,
                                 nonprofit_gs.city, nonprofit_gs.state,
                                 nonprofit_gs.zip)
        DBSession.add(nonprofit_db)
    else:
        nonprofit_db.name = nonprofit_gs.name
        nonprofit_db.ntee_code = nonprofit_gs.ntee_code
        nonprofit_db.mission = nonprofit_gs.mission
        nonprofit_db.description = nonprofit_gs.mission
        nonprofit_db.city = nonprofit_gs.city
        nonprofit_db.state = nonprofit_gs.state
        nonprofit_db.ZIP = nonprofit_gs.zip
    DBSession.commit()
    return nonprofit_db
Beispiel #4
0
def add_similarity_scores_for_nonprofit_tweets():
    """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()')

    tweets = DBSession.query(Tweet.twitter_name, func.group_concat(Tweet.text).label('text')).group_by(Tweet.twitter_name).all()
    similarity_matrix = similarity.get_similarity_scores_all_pairs([tweet.text for tweet in tweets])
    DBSession.query(Nonprofits_Similarity_By_Tweets).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n]))
    DBSession.commit()
Beispiel #5
0
def add_similarity_scores_for_nonprofit_descriptions():
    """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()')

    nonprofits = DBSession.query(Nonprofit).filter(Nonprofit.description != None).all()  # nopep8
    similarity_matrix = similarity.get_similarity_scores_all_pairs([nonprofit.description for nonprofit in nonprofits])
    DBSession.query(Nonprofits_Similarity_By_Description).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(Nonprofits_Similarity_By_Description(nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n]))
    DBSession.commit()
Beispiel #6
0
def add_similarity_scores_for_nonprofit_descriptions():
    """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()')

    nonprofits = DBSession.query(Nonprofit).filter(
        Nonprofit.description != None).all()  # nopep8
    similarity_matrix = similarity.get_similarity_scores_all_pairs(
        [nonprofit.description for nonprofit in nonprofits])
    DBSession.query(Nonprofits_Similarity_By_Description).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(
                Nonprofits_Similarity_By_Description(
                    nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id,
                    similarity_matrix[m][n]))
    DBSession.commit()
Beispiel #7
0
def get_tweets_for_nonprofit(nonprofits_id):
    """Retrieve tweets for the given nonprofit and store them in the DB."""
    logger.debug(
        'Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'.
        format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    max_tweet = DBSession.query(
        func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter(
            Tweet.twitter_name == nonprofit.twitter_name).first()
    if max_tweet is None or max_tweet.max_tweet_id is None:
        max_tweet_id = 1
    else:
        max_tweet_id = max_tweet.max_tweet_id

    tweets = []
    if nonprofit.twitter_id is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_id(
            nonprofit.twitter_id, True, since_id=max_tweet_id)
    elif nonprofit.twitter_name is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_name(
            nonprofit.twitter_name, True, since_id=max_tweet_id)
    else:
        pass

    for tweet in tweets:
        DBSession.add(
            Tweet(
                tweet['user']['screen_name'], tweet['id_str'],
                tweet['created_at'], tweet['text'].encode('utf-8'),
                tweet['lang'], tweet['retweet_count'], tweet['favorite_count'],
                ', '.join([
                    mention['id_str']
                    for mention in tweet['entities']['user_mentions']
                ]), ', '.join([
                    mention['screen_name']
                    for mention in tweet['entities']['user_mentions']
                ]), ', '.join([
                    hashtag['text']
                    for hashtag in tweet['entities']['hashtags']
                ]), ', '.join([
                    url['expanded_url'] for url in tweet['entities']['urls']
                ]), tweet['in_reply_to_screen_name'],
                tweet['in_reply_to_user_id_str'],
                tweet['in_reply_to_status_id_str']))
    DBSession.commit()
Beispiel #8
0
def add_similarity_scores_for_nonprofit_tweets():
    """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()')

    tweets = DBSession.query(Tweet.twitter_name,
                             func.group_concat(
                                 Tweet.text).label('text')).group_by(
                                     Tweet.twitter_name).all()
    similarity_matrix = similarity.get_similarity_scores_all_pairs(
        [tweet.text for tweet in tweets])
    DBSession.query(Nonprofits_Similarity_By_Tweets).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(
                Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name,
                                                tweets[n].twitter_name,
                                                similarity_matrix[m][n]))
    DBSession.commit()