def add_similarity_scores_for_nonprofit_tweets(): """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()') tweets = DBSession.query(Tweet.twitter_name, func.group_concat(Tweet.text).label('text')).group_by(Tweet.twitter_name).all() similarity_matrix = similarity.get_similarity_scores_all_pairs([tweet.text for tweet in tweets]) DBSession.query(Nonprofits_Similarity_By_Tweets).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add(Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n])) DBSession.commit()
def add_similarity_scores_for_nonprofit_descriptions(): """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()') nonprofits = DBSession.query(Nonprofit).filter(Nonprofit.description != None).all() # nopep8 similarity_matrix = similarity.get_similarity_scores_all_pairs([nonprofit.description for nonprofit in nonprofits]) DBSession.query(Nonprofits_Similarity_By_Description).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add(Nonprofits_Similarity_By_Description(nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n])) DBSession.commit()
def add_similarity_scores_for_nonprofit_descriptions(): """Calculate similarity scores for every pair of nonprofit descriptions and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_descriptions()') nonprofits = DBSession.query(Nonprofit).filter( Nonprofit.description != None).all() # nopep8 similarity_matrix = similarity.get_similarity_scores_all_pairs( [nonprofit.description for nonprofit in nonprofits]) DBSession.query(Nonprofits_Similarity_By_Description).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add( Nonprofits_Similarity_By_Description( nonprofits[m].nonprofits_id, nonprofits[n].nonprofits_id, similarity_matrix[m][n])) DBSession.commit()
def add_similarity_scores_for_nonprofit_tweets(): """Calculate similarity scores for every pair of nonprofit tweets and store them in the DB.""" logger.debug('Inside add_similarity_scores_for_nonprofit_tweets()') tweets = DBSession.query(Tweet.twitter_name, func.group_concat( Tweet.text).label('text')).group_by( Tweet.twitter_name).all() similarity_matrix = similarity.get_similarity_scores_all_pairs( [tweet.text for tweet in tweets]) DBSession.query(Nonprofits_Similarity_By_Tweets).delete() for m in xrange(len(similarity_matrix) - 1): for n in xrange(m + 1, len(similarity_matrix)): DBSession.add( Nonprofits_Similarity_By_Tweets(tweets[m].twitter_name, tweets[n].twitter_name, similarity_matrix[m][n])) DBSession.commit()