Example #1
0
def add_guidestar_info_to_db(ein):
    """Takes the EIN of a nonprofit as input. If the nonprofit is already in the DB, its info is updated.
    If the nonprofit is not in the DB, it is inserted."""
    logger.debug('Inside add_guidestar_info_to_db({0})'.format(ein))

    query = DBSession.query(Nonprofit).filter(Nonprofit.ein == ein)
    nonprofit_db = query.first()
    nonprofit_gs = givinggraph.guidestar.search.get_nonprofit(ein)
    if nonprofit_gs is None:
        return None

    if nonprofit_db is None:
        nonprofit_db = Nonprofit(nonprofit_gs.name,
                                 nonprofit_gs.ein,
                                 nonprofit_gs.ntee_code,
                                 nonprofit_gs.mission,
                                 nonprofit_gs.mission,
                                 None,
                                 None,
                                 nonprofit_gs.city,
                                 nonprofit_gs.state,
                                 nonprofit_gs.zip)
        DBSession.add(nonprofit_db)
    else:
        nonprofit_db.name = nonprofit_gs.name
        nonprofit_db.ntee_code = nonprofit_gs.ntee_code
        nonprofit_db.mission = nonprofit_gs.mission
        nonprofit_db.description = nonprofit_gs.mission
        nonprofit_db.city = nonprofit_gs.city
        nonprofit_db.state = nonprofit_gs.state
        nonprofit_db.ZIP = nonprofit_gs.zip
    DBSession.commit()
    return nonprofit_db
Example #2
0
def add_nonprofit_company_news_article_connections(article_ids, companies):
    """Takes a list of IDs of news articles and a list of Company objects as input. If any of
    the articles contain a company name, a link is made in the DB between the article and the company."""
    logger.debug(
        'Inside add_nonprofit_company_news_article_connections(news_articles, companies)'
    )
    for article_id in article_ids:
        article = DBSession.query(News_Article).get(article_id)
        if article is None:
            print '***************************'
            print '***************************'
            print article_id
            print '***************************'
            print '***************************'
            time.sleep(180)
        counter = 1
        for company in companies:
            if counter % 100 == 0:
                print 'Processing article {0} for company {1} of {2}...'.format(
                    article_id, counter, len(companies))
            counter += 1
            for mention in news_parser.get_company_mentions_in_text(
                    article.text, company.name.encode('utf-8')):
                if news_parser.contains_supportive_wording(mention):
                    article.companies.append(company)
                    break
    DBSession.commit()
Example #3
0
def add_new_nonprofit(ein):
    if DBSession.query(Nonprofit).filter(Nonprofit.ein == ein).first() is not None:
        return

    # lookup guidestar info before doing anything else.
    nonprofit = add_guidestar_info_to_db(ein)
    if nonprofit is None:
        print 'Guidestar returned nothing for EIN {0}, exiting.'.format(ein)
        return None

    logger.debug('Getting companies...')
    companies = DBSession.query(Company).all()
    logger.debug('Companies retrieved.')

    # twitter_chain = chain(update_nonprofit_twitter_name.si(nonprofit.nonprofits_id),
    #                       group(get_tweets_for_nonprofit.si(nonprofit.nonprofits_id),
    #                             get_followers_for_nonprofit.si(nonprofit.nonprofits_id)))

    # add_news_articles_to_db_for_nonprofit returns a list of articles, which will get passed as the 2nd argument to add_nonprofit_company_news_article_connections
    # NOTE: Commented out because of synchronization issue: articles passed to add_nonprofit_company_news_article_connections(...) are in the DB, but SQLAlchemy doesn't see them.
    # news_chain = chain(add_news_articles_to_db_for_nonprofit.si(nonprofit.nonprofits_id),
    #                    add_nonprofit_company_news_article_connections.s(companies))
    article_ids = add_news_articles_to_db_for_nonprofit(nonprofit.nonprofits_id)
    add_nonprofit_company_news_article_connections(article_ids, companies)

    update_nonprofit_twitter_name(nonprofit.nonprofits_id)
    get_tweets_for_nonprofit(nonprofit.nonprofits_id)
    get_followers_for_nonprofit(nonprofit.nonprofits_id)
Example #4
0
def add_new_nonprofit(ein):
    if DBSession.query(Nonprofit).filter(
            Nonprofit.ein == ein).first() is not None:
        return

    # lookup guidestar info before doing anything else.
    nonprofit = add_guidestar_info_to_db(ein)
    if nonprofit is None:
        print 'Guidestar returned nothing for EIN {0}, exiting.'.format(ein)
        return None

    logger.debug('Getting companies...')
    companies = DBSession.query(Company).all()
    logger.debug('Companies retrieved.')

    # twitter_chain = chain(update_nonprofit_twitter_name.si(nonprofit.nonprofits_id),
    #                       group(get_tweets_for_nonprofit.si(nonprofit.nonprofits_id),
    #                             get_followers_for_nonprofit.si(nonprofit.nonprofits_id)))

    # add_news_articles_to_db_for_nonprofit returns a list of articles, which will get passed as the 2nd argument to add_nonprofit_company_news_article_connections
    # NOTE: Commented out because of synchronization issue: articles passed to add_nonprofit_company_news_article_connections(...) are in the DB, but SQLAlchemy doesn't see them.
    # news_chain = chain(add_news_articles_to_db_for_nonprofit.si(nonprofit.nonprofits_id),
    #                    add_nonprofit_company_news_article_connections.s(companies))
    article_ids = add_news_articles_to_db_for_nonprofit(
        nonprofit.nonprofits_id)
    add_nonprofit_company_news_article_connections(article_ids, companies)

    update_nonprofit_twitter_name(nonprofit.nonprofits_id)
    get_tweets_for_nonprofit(nonprofit.nonprofits_id)
    get_followers_for_nonprofit(nonprofit.nonprofits_id)
Example #5
0
def add_guidestar_info_to_db(ein):
    """Takes the EIN of a nonprofit as input. If the nonprofit is already in the DB, its info is updated.
    If the nonprofit is not in the DB, it is inserted."""
    logger.debug('Inside add_guidestar_info_to_db({0})'.format(ein))

    query = DBSession.query(Nonprofit).filter(Nonprofit.ein == ein)
    nonprofit_db = query.first()
    nonprofit_gs = givinggraph.guidestar.search.get_nonprofit(ein)
    if nonprofit_gs is None:
        return None

    if nonprofit_db is None:
        nonprofit_db = Nonprofit(nonprofit_gs.name, nonprofit_gs.ein,
                                 nonprofit_gs.ntee_code, nonprofit_gs.mission,
                                 nonprofit_gs.mission, None, None,
                                 nonprofit_gs.city, nonprofit_gs.state,
                                 nonprofit_gs.zip)
        DBSession.add(nonprofit_db)
    else:
        nonprofit_db.name = nonprofit_gs.name
        nonprofit_db.ntee_code = nonprofit_gs.ntee_code
        nonprofit_db.mission = nonprofit_gs.mission
        nonprofit_db.description = nonprofit_gs.mission
        nonprofit_db.city = nonprofit_gs.city
        nonprofit_db.state = nonprofit_gs.state
        nonprofit_db.ZIP = nonprofit_gs.zip
    DBSession.commit()
    return nonprofit_db
Example #6
0
def add_news_articles_to_db_for_nonprofits():
    """Look up news articles for every nonprofit in the DB, and store any news articles containing company names."""
    logger.debug('Inside add_news_articles_to_db_for_nonprofits()')

    logger.debug('Getting companies...')
    companies = DBSession.query(Company).all()
    logger.debug('Done loading companies...')
    for nonprofit in DBSession.query(Nonprofit).all():
        articles = add_news_articles_to_db_for_nonprofit(nonprofit)
        add_nonprofit_company_news_article_connections(companies, articles)
Example #7
0
def add_news_articles_to_db_for_nonprofits():
    """Look up news articles for every nonprofit in the DB, and store any news articles containing company names."""
    logger.debug('Inside add_news_articles_to_db_for_nonprofits()')

    logger.debug('Getting companies...')
    companies = DBSession.query(Company).all()
    logger.debug('Done loading companies...')
    for nonprofit in DBSession.query(Nonprofit).all():
        articles = add_news_articles_to_db_for_nonprofit(nonprofit)
        add_nonprofit_company_news_article_connections(companies, articles)
Example #8
0
def show_topics_for_tweets():
    """Experimental code for displaying topics generated by topic modeling."""
    twitter_names = [row.twitter_name for row in DBSession.query(Tweet.twitter_name).group_by(Tweet.twitter_name).all()]
    tweets = []
    print 'Retrieving tweets...'
    for tweet_name in twitter_names:
        tweet_text = [row.text for row in DBSession.query(Tweet.text).filter(Tweet.twitter_name == tweet_name).all()]
        tweets.append('\n'.join(tweet_text))

    print 'Getting topics...'
    lda.get_topics(tweets)
Example #9
0
def sector_stats(ntee):
    """
    Return a dict containing average values for various graph metrics for
    nonprofits with this NTEE code.
    """
    results = DBSession.query(Nonprofit).filter(Nonprofit.ntee_code.like(ntee + '%')).all()
    stats = defaultdict(lambda: [])
    for result in [r for r in results if r.twitter_name]:
        nta = DBSession.query(Nonprofit_Twitter_Attributes).filter(Nonprofit_Twitter_Attributes.id == result.twitter_name).first()
        if nta:
            stats['clustering_coefficient'].append(float(nta.clustering_coefficient))
            stats['hub'].append(float(nta.hub))
            stats['authority'].append(float(nta.authority))
    return dict((k, np.mean(v)) for k, v in stats.iteritems())
Example #10
0
def update_null_nonprofit_twitter_ids():
    """Finds nonprofits for which the Twitter name is not null, but the Twitter user ID is null,
    and gives the Twitter user ID a value."""
    logger.debug('Inside update_null_nonprofit_twitter_ids()')

    query = DBSession.query(Nonprofit).filter(Nonprofit.twitter_id == None).filter(Nonprofit.twitter_name != None)  # nopep8
    nonprofits = query.all()
    screen_names = [nonprofit.twitter_name for nonprofit in nonprofits]
    screen_name_to_id_map = givinggraph.twitter.users.get_screen_name_to_id_map(screen_names)
    for nonprofit in nonprofits:
        if nonprofit.twitter_name.lower() in screen_name_to_id_map:
            nonprofit.twitter_id = screen_name_to_id_map[nonprofit.twitter_name.lower()]
        else:
            print '"{0}" was not found, the account may have been deleted or the screen name may have changed.'.format(nonprofit.twitter_name)
    DBSession.commit()
Example #11
0
def update_nonprofit_twitter_name(nonprofits_id):
    """Takes the ID of a nonprofit and uses Yahoo to try to find the Twitter name for that nonprofit.
     If found, the nonprofit's entry in the DB is updated."""
    logger.debug('Inside update_nonprofit_twitter_name(nonprofits_id) for nonprofits_id {0}'.format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    search_results = givinggraph.yahoo.search.get_search_results('twitter ' + nonprofit.name)
    if len(search_results) == 0:
        return
    twitter_url = search_results[0]
    twitter_url = twitter_url.replace('http://', '').replace('https://', '')
    twitter_name = None

    if twitter_url[:11] == 'twitter.com':
        twitter_name = twitter_url[12:]
    nonprofit.twitter_name = twitter_name
    DBSession.commit()
Example #12
0
def show_topics_for_tweets():
    """Experimental code for displaying topics generated by topic modeling."""
    twitter_names = [
        row.twitter_name for row in DBSession.query(
            Tweet.twitter_name).group_by(Tweet.twitter_name).all()
    ]
    tweets = []
    print 'Retrieving tweets...'
    for tweet_name in twitter_names:
        tweet_text = [
            row.text for row in DBSession.query(Tweet.text).filter(
                Tweet.twitter_name == tweet_name).all()
        ]
        tweets.append('\n'.join(tweet_text))

    print 'Getting topics...'
    lda.get_topics(tweets)
Example #13
0
def sector_stats(ntee):
    """
    Return a dict containing average values for various graph metrics for
    nonprofits with this NTEE code.
    """
    results = DBSession.query(Nonprofit).filter(Nonprofit.ntee_code.like(ntee + "%")).all()
    stats = defaultdict(lambda: [])
    for result in [r for r in results if r.twitter_name]:
        nta = (
            DBSession.query(Nonprofit_Twitter_Attributes)
            .filter(Nonprofit_Twitter_Attributes.id == result.twitter_name)
            .first()
        )
        if nta:
            stats["clustering_coefficient"].append(float(nta.clustering_coefficient))
            stats["hub"].append(float(nta.hub))
            stats["authority"].append(float(nta.authority))
    return dict((k, np.mean(v)) for k, v in stats.iteritems())
Example #14
0
def update_nonprofit_twitter_name(nonprofits_id):
    """Takes the ID of a nonprofit and uses Yahoo to try to find the Twitter name for that nonprofit.
     If found, the nonprofit's entry in the DB is updated."""
    logger.debug(
        'Inside update_nonprofit_twitter_name(nonprofits_id) for nonprofits_id {0}'
        .format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    search_results = givinggraph.yahoo.search.get_search_results(
        'twitter ' + nonprofit.name)
    if len(search_results) == 0:
        return
    twitter_url = search_results[0]
    twitter_url = twitter_url.replace('http://', '').replace('https://', '')
    twitter_name = None

    if twitter_url[:11] == 'twitter.com':
        twitter_name = twitter_url[12:]
    nonprofit.twitter_name = twitter_name
    DBSession.commit()
Example #15
0
def update_null_nonprofit_twitter_ids():
    """Finds nonprofits for which the Twitter name is not null, but the Twitter user ID is null,
    and gives the Twitter user ID a value."""
    logger.debug('Inside update_null_nonprofit_twitter_ids()')

    query = DBSession.query(Nonprofit).filter(
        Nonprofit.twitter_id == None).filter(
            Nonprofit.twitter_name != None)  # nopep8
    nonprofits = query.all()
    screen_names = [nonprofit.twitter_name for nonprofit in nonprofits]
    screen_name_to_id_map = givinggraph.twitter.users.get_screen_name_to_id_map(
        screen_names)
    for nonprofit in nonprofits:
        if nonprofit.twitter_name.lower() in screen_name_to_id_map:
            nonprofit.twitter_id = screen_name_to_id_map[
                nonprofit.twitter_name.lower()]
        else:
            print '"{0}" was not found, the account may have been deleted or the screen name may have changed.'.format(
                nonprofit.twitter_name)
    DBSession.commit()
Example #16
0
def possible_partners():
    """Return the possible donors given a nonprofit"""
    attr = request.args.get('attr')
    if attr == 'description':
        query = "call  from_id_to_companies_by_desc('%d')" % int(request.args.get('id'))
    elif attr == 'homepage':
        query = "call  from_id_to_companies_by_home('%d')" % int(request.args.get('id'))
    elif attr == 'tweets':
        query = "call  from_id_to_companies_by_tweets('%d')" % int(request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #17
0
def get_tweets_for_nonprofit(nonprofits_id):
    """Retrieve tweets for the given nonprofit and store them in the DB."""
    logger.debug('Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    max_tweet = DBSession.query(func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter(Tweet.twitter_name == nonprofit.twitter_name).first()
    if max_tweet is None or max_tweet.max_tweet_id is None:
        max_tweet_id = 1
    else:
        max_tweet_id = max_tweet.max_tweet_id

    tweets = []
    if nonprofit.twitter_id is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_id(nonprofit.twitter_id, True, since_id=max_tweet_id)
    elif nonprofit.twitter_name is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_name(nonprofit.twitter_name, True, since_id=max_tweet_id)
    else:
        pass

    for tweet in tweets:
        DBSession.add(Tweet(tweet['user']['screen_name'],
                            tweet['id_str'],
                            tweet['created_at'],
                            tweet['text'].encode('utf-8'),
                            tweet['lang'],
                            tweet['retweet_count'],
                            tweet['favorite_count'],
                            ', '.join([mention['id_str'] for mention in tweet['entities']['user_mentions']]),
                            ', '.join([mention['screen_name'] for mention in tweet['entities']['user_mentions']]),
                            ', '.join([hashtag['text'] for hashtag in tweet['entities']['hashtags']]),
                            ', '.join([url['expanded_url'] for url in tweet['entities']['urls']]),
                            tweet['in_reply_to_screen_name'],
                            tweet['in_reply_to_user_id_str'],
                            tweet['in_reply_to_status_id_str']))
    DBSession.commit()
Example #18
0
def sector_summary():
    """Return the summary of a given NTEE code"""
    query = "call  sector_summary('%s')" % request.args.get('ntee')
    result = DBSession.execute(query)
    my_dict = procedure_to_json(result)

    nonprofits = my_dict['results']

    result = {
        'avg_closeness_centrality': 0,
        'avg_clustering_coefficient': 0,
        'avg_degree': 0,
        'avg_hubAuth': 0,
        'avg_weighted_degree': 0,
        'avg_eccentricity': 0,
        'avg_clustering_coefficient': 0
    }

    tw_communities = {}
    web_communities = {}
    desc_communities = {}

    for nonprofit in nonprofits:
        result['avg_closeness_centrality'] += nonprofit[
            'closeness_centrality'] / float(len(nonprofits))
        result['avg_clustering_coefficient'] += nonprofit[
            'clustering_coefficient'] / float(len(nonprofits))
        result['avg_degree'] += nonprofit['degree'] / float(len(nonprofits))
        result['avg_hubAuth'] += nonprofit['hubAuth'] / float(len(nonprofits))
        result['avg_weighted_degree'] += nonprofit['weighted_degree'] / float(
            len(nonprofits))
        result['avg_eccentricity'] += nonprofit['eccentricity'] / float(
            len(nonprofits))
        result['avg_clustering_coefficient'] += nonprofit[
            'clustering_coefficient'] / float(len(nonprofits))
        if nonprofit['tw_community'] not in tw_communities:
            tw_communities[nonprofit['tw_community']] = 0
        else:
            tw_communities[nonprofit['tw_community']] += 1
        if nonprofit['web_community'] not in web_communities:
            web_communities[nonprofit['web_community']] = 0
        else:
            web_communities[nonprofit['web_community']] += 1
        if nonprofit['desc_community'] not in desc_communities:
            desc_communities[nonprofit['desc_community']] = 0
        else:
            desc_communities[nonprofit['desc_community']] += 1
    result['tw_communities'] = tw_communities
    result['web_communities'] = web_communities
    result['desc_communities'] = desc_communities

    return json.dumps(result)
Example #19
0
def get_followers_for_nonprofit(nonprofits_id):
    """Retrieve followers for the given nonprofit and store them in the DB."""
    logger.debug('Inside get_followers_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)
    if nonprofit.twitter_id is not None:
        pass
        # follower_ids = givinggraph.twitter.users.get_followers(nonprofit.twitter_id)
        # DBSession.query(Nonprofits_Follower).filter(Nonprofits_Follower.nonprofit_handle == nonprofit.twitter_name).delete()
        # for follower_id in follower_ids:
        #     DBSession.add(Nonprofits_Follower(nonprofit.twitter_name, follower_id))
        # DBSession.commit()
    else:
        pass
Example #20
0
def add_similarity_scores_for_nonprofit_descriptions():
    """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()')

    nonprofits = DBSession.query(Nonprofit).filter(Nonprofit.description != None).all()  # nopep8
    similarity_matrix = similarity.get_similarity_scores_all_pairs([nonprofit.description for nonprofit in nonprofits])
    DBSession.query(Nonprofits_Similarity_By_Description).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(Nonprofits_Similarity_By_Description(nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n]))
    DBSession.commit()
Example #21
0
def add_similarity_scores_for_nonprofit_tweets():
    """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()')

    tweets = DBSession.query(Tweet.twitter_name, func.group_concat(Tweet.text).label('text')).group_by(Tweet.twitter_name).all()
    similarity_matrix = similarity.get_similarity_scores_all_pairs([tweet.text for tweet in tweets])
    DBSession.query(Nonprofits_Similarity_By_Tweets).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n]))
    DBSession.commit()
Example #22
0
def possible_partners():
    """Return the possible donors given a nonprofit"""
    attr = request.args.get('attr')
    if attr == 'description':
        query = "call  from_id_to_companies_by_desc('%d')" % int(
            request.args.get('id'))
    elif attr == 'homepage':
        query = "call  from_id_to_companies_by_home('%d')" % int(
            request.args.get('id'))
    elif attr == 'tweets':
        query = "call  from_id_to_companies_by_tweets('%d')" % int(
            request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #23
0
def add_nonprofit_company_news_article_connections(article_ids, companies):
    """Takes a list of IDs of news articles and a list of Company objects as input. If any of
    the articles contain a company name, a link is made in the DB between the article and the company."""
    logger.debug('Inside add_nonprofit_company_news_article_connections(news_articles, companies)')
    for article_id in article_ids:
        article = DBSession.query(News_Article).get(article_id)
        if article is None:
            print '***************************'
            print '***************************'
            print article_id
            print '***************************'
            print '***************************'
            time.sleep(180)
        counter = 1
        for company in companies:
            if counter % 100 == 0:
                print 'Processing article {0} for company {1} of {2}...'.format(article_id, counter, len(companies))
            counter += 1
            for mention in news_parser.get_company_mentions_in_text(article.text, company.name.encode('utf-8')):
                if news_parser.contains_supportive_wording(mention):
                    article.companies.append(company)
                    break
    DBSession.commit()
Example #24
0
def similarity():
    """Return the most similar nonprofits given a nonprofits and a metric."""
    top = 10 if request.args.get('top') is None else int(request.args.get('top'))
    attr = request.args.get('attr')
    if attr == 'description':
        query = 'call  from_nonprofit_id_to_similar_charities_by_description(%d, %d)' % (int(request.args.get('id')), top)
    elif attr == 'homepage':
        query = 'call  from_nonprofit_id_to_similar_charities_by_homepage(%d, %d)' % (int(request.args.get('id')), top)
    elif attr == 'tweets':
        query = 'call  from_nonprofit_id_to_similar_charities_by_tweets(%d, %d)' % (int(request.args.get('id')), top)
    elif attr == 'followers':
        query = 'call  from_nonprofit_id_to_similar_charities_by_followers(%d, %d)' % (int(request.args.get('id')), top)
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #25
0
def get_followers_for_nonprofit(nonprofits_id):
    """Retrieve followers for the given nonprofit and store them in the DB."""
    logger.debug(
        'Inside get_followers_for_nonprofit(nonprofit) for nonprofits_id {0}'.
        format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)
    if nonprofit.twitter_id is not None:
        pass
        # follower_ids = givinggraph.twitter.users.get_followers(nonprofit.twitter_id)
        # DBSession.query(Nonprofits_Follower).filter(Nonprofits_Follower.nonprofit_handle == nonprofit.twitter_name).delete()
        # for follower_id in follower_ids:
        #     DBSession.add(Nonprofits_Follower(nonprofit.twitter_name, follower_id))
        # DBSession.commit()
    else:
        pass
Example #26
0
def sector_summary():
    """Return the summary of a given NTEE code"""
    query = "call  sector_summary('%s')" % request.args.get('ntee')
    result = DBSession.execute(query)
    my_dict = procedure_to_json(result)

    nonprofits = my_dict['results']

    result = {
        'avg_closeness_centrality': 0,
        'avg_clustering_coefficient': 0,
        'avg_degree': 0,
        'avg_hubAuth': 0,
        'avg_weighted_degree': 0,
        'avg_eccentricity': 0,
        'avg_clustering_coefficient': 0
    }

    tw_communities = {}
    web_communities = {}
    desc_communities = {}

    for nonprofit in nonprofits:
        result['avg_closeness_centrality'] += nonprofit['closeness_centrality'] / float(len(nonprofits))
        result['avg_clustering_coefficient'] += nonprofit['clustering_coefficient'] / float(len(nonprofits))
        result['avg_degree'] += nonprofit['degree'] / float(len(nonprofits))
        result['avg_hubAuth'] += nonprofit['hubAuth'] / float(len(nonprofits))
        result['avg_weighted_degree'] += nonprofit['weighted_degree'] / float(len(nonprofits))
        result['avg_eccentricity'] += nonprofit['eccentricity'] / float(len(nonprofits))
        result['avg_clustering_coefficient'] += nonprofit['clustering_coefficient'] / float(len(nonprofits))
        if nonprofit['tw_community'] not in tw_communities:
            tw_communities[nonprofit['tw_community']] = 0
        else:
            tw_communities[nonprofit['tw_community']] += 1
        if nonprofit['web_community'] not in web_communities:
            web_communities[nonprofit['web_community']] = 0
        else:
            web_communities[nonprofit['web_community']] += 1
        if nonprofit['desc_community'] not in desc_communities:
            desc_communities[nonprofit['desc_community']] = 0
        else:
            desc_communities[nonprofit['desc_community']] += 1
    result['tw_communities'] = tw_communities
    result['web_communities'] = web_communities
    result['desc_communities'] = desc_communities

    return json.dumps(result)
Example #27
0
def add_similarity_scores_for_nonprofit_descriptions():
    """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()')

    nonprofits = DBSession.query(Nonprofit).filter(
        Nonprofit.description != None).all()  # nopep8
    similarity_matrix = similarity.get_similarity_scores_all_pairs(
        [nonprofit.description for nonprofit in nonprofits])
    DBSession.query(Nonprofits_Similarity_By_Description).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(
                Nonprofits_Similarity_By_Description(
                    nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id,
                    similarity_matrix[m][n]))
    DBSession.commit()
Example #28
0
def add_news_articles_to_db_for_nonprofit(nonprofits_id):
    """Searches the web for news articles related to the nonprofit and stores them in the DB. Returns the IDs of the news articles found."""
    logger.debug('Inside add_news_articles_to_db_for_nonprofit(nonprofit) for nonprofits_id {0}'.format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    query = DBSession.query(News_Article).filter(News_Article.nonprofits_id == nonprofits_id)
    already_retrieved_urls = [news_article.url for news_article in query.all()]
    news_articles = []
    for article in news_searcher.find_news_articles(nonprofit.name, urls_to_ignore=already_retrieved_urls):
        news_articles.append(News_Article(nonprofit.nonprofits_id, article.url, article.headline, article.body))
    DBSession.add_all(news_articles)
    DBSession.commit()
    return [news_article.news_articles_id for news_article in news_articles]
Example #29
0
def similarity():
    """Return the most similar nonprofits given a nonprofits and a metric."""
    top = 10 if request.args.get('top') is None else int(
        request.args.get('top'))
    attr = request.args.get('attr')
    if attr == 'description':
        query = 'call  from_nonprofit_id_to_similar_charities_by_description(%d, %d)' % (
            int(request.args.get('id')), top)
    elif attr == 'homepage':
        query = 'call  from_nonprofit_id_to_similar_charities_by_homepage(%d, %d)' % (
            int(request.args.get('id')), top)
    elif attr == 'tweets':
        query = 'call  from_nonprofit_id_to_similar_charities_by_tweets(%d, %d)' % (
            int(request.args.get('id')), top)
    elif attr == 'followers':
        query = 'call  from_nonprofit_id_to_similar_charities_by_followers(%d, %d)' % (
            int(request.args.get('id')), top)
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #30
0
def add_similarity_scores_for_nonprofit_tweets():
    """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB."""
    logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()')

    tweets = DBSession.query(Tweet.twitter_name,
                             func.group_concat(
                                 Tweet.text).label('text')).group_by(
                                     Tweet.twitter_name).all()
    similarity_matrix = similarity.get_similarity_scores_all_pairs(
        [tweet.text for tweet in tweets])
    DBSession.query(Nonprofits_Similarity_By_Tweets).delete()
    for m in xrange(len(similarity_matrix) - 1):
        for n in xrange(m + 1, len(similarity_matrix)):
            DBSession.add(
                Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name,
                                                tweets[n].twitter_name,
                                                similarity_matrix[m][n]))
    DBSession.commit()
Example #31
0
def get_tweets_for_nonprofit(nonprofits_id):
    """Retrieve tweets for the given nonprofit and store them in the DB."""
    logger.debug(
        'Inside get_tweets_for_nonprofit(nonprofit) for nonprofits_id {0}'.
        format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    max_tweet = DBSession.query(
        func.max(cast(Tweet.tweet_id, Integer)).label('max_tweet_id')).filter(
            Tweet.twitter_name == nonprofit.twitter_name).first()
    if max_tweet is None or max_tweet.max_tweet_id is None:
        max_tweet_id = 1
    else:
        max_tweet_id = max_tweet.max_tweet_id

    tweets = []
    if nonprofit.twitter_id is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_id(
            nonprofit.twitter_id, True, since_id=max_tweet_id)
    elif nonprofit.twitter_name is not None:
        tweets = givinggraph.twitter.tweets.get_tweets_by_name(
            nonprofit.twitter_name, True, since_id=max_tweet_id)
    else:
        pass

    for tweet in tweets:
        DBSession.add(
            Tweet(
                tweet['user']['screen_name'], tweet['id_str'],
                tweet['created_at'], tweet['text'].encode('utf-8'),
                tweet['lang'], tweet['retweet_count'], tweet['favorite_count'],
                ', '.join([
                    mention['id_str']
                    for mention in tweet['entities']['user_mentions']
                ]), ', '.join([
                    mention['screen_name']
                    for mention in tweet['entities']['user_mentions']
                ]), ', '.join([
                    hashtag['text']
                    for hashtag in tweet['entities']['hashtags']
                ]), ', '.join([
                    url['expanded_url'] for url in tweet['entities']['urls']
                ]), tweet['in_reply_to_screen_name'],
                tweet['in_reply_to_user_id_str'],
                tweet['in_reply_to_status_id_str']))
    DBSession.commit()
Example #32
0
def add_news_articles_to_db_for_nonprofit(nonprofits_id):
    """Searches the web for news articles related to the nonprofit and stores them in the DB. Returns the IDs of the news articles found."""
    logger.debug(
        'Inside add_news_articles_to_db_for_nonprofit(nonprofit) for nonprofits_id {0}'
        .format(nonprofits_id))
    nonprofit = DBSession.query(Nonprofit).get(nonprofits_id)

    query = DBSession.query(News_Article).filter(
        News_Article.nonprofits_id == nonprofits_id)
    already_retrieved_urls = [news_article.url for news_article in query.all()]
    news_articles = []
    for article in news_searcher.find_news_articles(
            nonprofit.name, urls_to_ignore=already_retrieved_urls):
        news_articles.append(
            News_Article(nonprofit.nonprofits_id, article.url,
                         article.headline, article.body))
    DBSession.add_all(news_articles)
    DBSession.commit()
    return [news_article.news_articles_id for news_article in news_articles]
Example #33
0
def related_companies():
    """Return the companies that are mentioned with a nonprofit in news articles"""
    query = 'call  related_companies(%d)' % int(request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #34
0
def twitter():
    """Return twitter-related information given a nonprofit"""
    query = 'call  from_nonprofit_id_to_twitter(%d)' % int(request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #35
0
def graph_stats():
    """Return the SNA indexes given a nonprofit"""
    query = 'call  from_nonprofit_id_to_sna(%d)' % int(request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #36
0
def by_ein(ein_id):
    """Lookup nonprofit by EIN."""
    return result2json(DBSession.query(Nonprofit).filter(Nonprofit.ein == ein_id).first())
Example #37
0
def by_id(nonprofit_id):
    """Lookup nonprofit by our internal id."""
    return result2json(DBSession.query(Nonprofit).filter(Nonprofit.nonprofits_id == nonprofit_id).first())
Example #38
0
def by_ein(ein_id):
    """Lookup nonprofit by EIN."""
    return result2json(
        DBSession.query(Nonprofit).filter(Nonprofit.ein == ein_id).first())
Example #39
0
def related_companies():
    """Return the companies that are mentioned with a nonprofit in news articles"""
    query = 'call  related_companies(%d)' % int(request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #40
0
def by_id(nonprofit_id):
    """Lookup nonprofit by our internal id."""
    return result2json(
        DBSession.query(Nonprofit).filter(
            Nonprofit.nonprofits_id == nonprofit_id).first())
Example #41
0
def twitter():
    """Return twitter-related information given a nonprofit"""
    query = 'call  from_nonprofit_id_to_twitter(%d)' % int(
        request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))
Example #42
0
def graph_stats():
    """Return the SNA indexes given a nonprofit"""
    query = 'call  from_nonprofit_id_to_sna(%d)' % int(request.args.get('id'))
    result = DBSession.execute(query)
    return json.dumps(procedure_to_json(result))