def _load_feed_statistics(self): self._feed_statistics = dict() #return self._mock_load_feed_statistics() dbi = DatabaseInterface.get_shared_instance() dbcursor = dbi.execute("SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics",None) for row in dbcursor.fetchall(): self._feed_statistics[row["feed_id"]] = (row["average_peak_score"],row["std_deviation"] )
def compute_similarities(self, candidate_ids ): """ Currently uses cosine similarity ( or i think it does )""" # This function determines the definition of similarity. #If the length of the vectors ever get out of hand, edit this. reader_id = self.usr.usr_id info = dict() for candidate_id in candidate_ids: info[ int(candidate_id) ] = {"sims": 0, "total":1, "similarity":0 } dbi = DatabaseInterface.get_shared_instance() # Do two queries. One to find the intersection... in_placeholder = ",".join( ["%s"]* len(candidate_ids) ) cursor = dbi.execute( "SELECT analytics_usr_shares.usr as candidate, COUNT(*) as share_count FROM " " analytics_usr_markread JOIN analytics_usr_shares ON " " analytics_usr_markread.content = analytics_usr_shares.content " " WHERE analytics_usr_markread.usr=%s AND analytics_usr_shares.usr IN " "( " + in_placeholder + " ) " " GROUP BY candidate", tuple( [reader_id] + list(candidate_ids) ) ) for row in cursor.fetchall(): if row["share_count"]!=0: info[ int(row["candidate"]) ]["sims"] = row["share_count"] # And , one to find the total size cursor = dbi.execute( "SELECT usr as candidate, COUNT(*) as total_share_count FROM " " analytics_usr_shares " " WHERE usr IN " "( " + in_placeholder + " ) " " GROUP BY usr ", tuple( candidate_ids ) ) for row in cursor.fetchall(): info[ int(row["candidate"]) ]["total"] = row["total_share_count"] cursor = dbi.execute( "SELECT COUNT(*) as total_markread_count FROM " " analytics_usr_markread " " WHERE usr=%s ", (reader_id,) ) usr_total = float(cursor.fetchall()[0]["total_markread_count"]) # Now let's compute and return. for candidate_id in candidate_ids: c_id = int(candidate_id) num = float( info[c_id]["sims"] ) denom = math.sqrt( info[c_id]["total"] * usr_total ) #Cosine # denom = info[c_id]["total"] + usr_total - num # Some other distance info[c_id]["similarity"] = num/max(1,denom) return info
def _load_feed_statistics(self): self._feed_statistics = dict() #return self._mock_load_feed_statistics() dbi = DatabaseInterface.get_shared_instance() dbcursor = dbi.execute( "SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics", None) avg_sum = float(0) std_dev_sum = float(0) i = 0 for row in dbcursor.fetchall(): self._feed_statistics[row["feed_id"]] = (row["average_peak_score"], row["std_deviation"]) avg_sum += row["average_peak_score"] std_dev_sum += (float(row["std_deviation"]) / max(1, row["average_peak_score"])) i += 1 if i == 0: i = 1 avg_avg = avg_sum / i avg_std_dev = (std_dev_sum / i) * avg_avg if avg_std_dev == 0: avg_std_dev = 1 self._feed_statistics[0] = ( avg_avg, avg_std_dev ) #If you hadn't figured it out by now, This is bullshit :p
def _prepare_set(self): dbi = DatabaseInterface.get_shared_instance() dbi.execute("TRUNCATE news_social_score_update",None) if self.update_all: dbi.execute("INSERT INTO news_social_score_update( ) ( SELECT * FROM news_social_score_all LIMIT )") else: dbi.execute("INSERT INTO news_social_score_update( SELECT * FROM news_social_score_active LIMIT ")
def _load_from_db(self): database_interface = DatabaseInterface.get_shared_instance() dbi = database_interface.dbi dbi.commit() dbi.autocommit(True)
def prepare_update(self): dbi = DatabaseInterface.get_shared_instance() dbi.execute('TRUNCATE news_social_score_update',None) last_retrieved_id = 0 dbi.autocommit(False) if self.update_all: ''' Tested. Seems to work. ''' rowcount = 1 while rowcount>0: cursor = dbi.execute("\ INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,created,state) \ ( SELECT story_id,last_update,raw_score,total_shares,created,'READY' FROM news_social_score_all \ WHERE story_id>( SELECT IFNULL(MAX(story_id), 0) FROM news_social_score_update ) LIMIT %s)",(SocialScoreUpdater.story_chunk_size,)) rowcount = cursor.rowcount #print rowcount dbi.commit() #dbi.execute("INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,state) ( SELECT story_id,last_update,raw_score,total_shares,'READY' FROM news_social_score_all) ",None) else: ''' Tested. Seems to work. ''' ''' Would the below query affect select queries on the active set? I don't need a write lock so it shouldn't ''' ''' The active set is assumed to be small enough for the mysql server to handle by itself. ''' dbi.execute("INSERT INTO news_social_score_update (story_id,old_raw_score,state) ( SELECT story_id,raw_score,'READY' FROM news_social_score_active) ",None) dbi.execute("\ UPDATE news_social_score_update JOIN news_social_score_all USING(story_id) SET\ news_social_score_update.total_shares = news_social_score_all.total_shares,\ news_social_score_update.last_update=news_social_score_all.last_update, \ news_social_score_update.created=news_social_score_all.created\ ",None ) dbi.commit() dbi.autocommit(True)
def add_new_story(story_id): ''' Adds a story to both the active and global set ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute("INSERT INTO news_social_score_all ( story_id, created ) (SELECT story_id,created FROM news_stories WHERE story_id=%s)",(story_id,)) dbi.execute("INSERT INTO news_social_score_active ( normalized_score, story_id, raw_score ) (SELECT 0,story_id,0 FROM news_stories WHERE story_id=%s)", (story_id,)) dbi.commit() dbi.autocommit(True)
def rotate_active_set(self): #Swap the tables by renaming dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute("DROP TABLE IF EXISTS news_social_score_active_old",None) #Let's keep this just incase dbi.execute("RENAME TABLE news_social_score_active TO news_social_score_active_old, news_social_score_active_new TO news_social_score_active",None) dbi.commit() dbi.autocommit(True)
def _load_from_cache(self): ''' implements very simple caching on the whole set. Idk why i bothered writing this but maybe it'll help -_- ''' dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute( "SELECT category_id, read_count, unnormalized_interest, interest, cached_time FROM cache_news_user_interests WHERE user_id=%s",(self.user_id,) ) rows = cursor.fetchall() temp_categories = dict() for row in rows: self.categories[ row['category_id'] ] = UserCategoryInterests.CategoryInterest(row['category_id'], row['read_count'], row['unnormalized_interest'], row['interest']) return True
def _store_in_db(self): database_interface = DatabaseInterface.get_shared_instance() dbi = database_interface.dbi dbi.autocommit(False) for url in self.og_shares: dbi.execute("INSERT INTO news_socialscore_opengraphgshares",) dbi.commit() dbi.autocommit(True)
def _load_feed_statistics(self): self._feed_statistics = dict() #return self._mock_load_feed_statistics() dbi = DatabaseInterface.get_shared_instance() dbcursor = dbi.execute( "SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics", None) for row in dbcursor.fetchall(): self._feed_statistics[row["feed_id"]] = (row["average_peak_score"], row["std_deviation"])
def load_feed_ids(self): dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute( "SELECT feed_id,facebook_page_id FROM news_feeds WHERE facebook_page_id IS NOT NULL", None) rows = cursor.fetchall() self.feed_ids = dict() for row in rows: if row['facebook_page_id'] is None: continue self.feed_ids[row['facebook_page_id']] = row['feed_id']
def _prepare_set(self): dbi = DatabaseInterface.get_shared_instance() dbi.execute("TRUNCATE news_social_score_update", None) if self.update_all: dbi.execute( "INSERT INTO news_social_score_update( ) ( SELECT * FROM news_social_score_all LIMIT )" ) else: dbi.execute( "INSERT INTO news_social_score_update( SELECT * FROM news_social_score_active LIMIT " )
def rotate_active_set(self): #Swap the tables by renaming dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute("DROP TABLE IF EXISTS news_social_score_active_old", None) #Let's keep this just incase dbi.execute( "RENAME TABLE news_social_score_active TO news_social_score_active_old, news_social_score_active_new TO news_social_score_active", None) dbi.commit() dbi.autocommit(True)
def load_category_interests(self): ''' Ok, but how is the question? Everything in active set? ''' self.categories = dict() dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute("SELECT category_id,read_count FROM news_user_interests_read_count ",None) rows = cursor.fetchall() print "ROWS",rows self._process_db_rows(rows) self._compute_interests() return self.categories
def update_scores(self): #Do it in chunks of story_chunk_size ''' Computes and updates the fields new_raw_score,normalized_score, last_updated, total_shares, state in news_social_score_updated, <story_chunk_size> stories at a time ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) self._load_feed_statistics() #print self._feed_statistics row_count = 1 while row_count>0 : cursor = dbi.execute("\ SELECT story_id, feed_id, news_social_score_update.created, url, old_raw_score, total_shares, last_update FROM\ news_social_score_update JOIN news_stories USING(story_id)\ WHERE news_social_score_update.state='READY' LIMIT %s", (SocialScoreUpdater.story_chunk_size,) ) #OMG A JOIN! if cursor.rowcount==0: break rowcount = cursor.rowcount stories = list() urls = list() for row in cursor.fetchall(): stories.append( SocialScoreUpdater.SocialScoreStory(row['story_id'],row['url'],row['feed_id'],row['old_raw_score'],row['total_shares'],row['last_update']) ) #story_id,url,feed_id,previous_score=None,last_updated=None, urls.append( row['url'] ) #Get the score from Facebook's opengraph ogscraper = OpenGraphShareScraper() ogscraper.set_urls( urls ) ogscraper.update_og_shares() og_shares = ogscraper.get_result() #Compute the new scores for each and update in DB ( But commit all at once ) time_now = int(time.time()) for story in stories: if story.url not in og_shares: new_total_shares = 0 #Nothing we can do. facebook has no records of it else: new_total_shares = og_shares[story.url] shares_since = new_total_shares - story.total_shares story.total_shares = new_total_shares story_id = story.story_id story.updated_raw_score = self._compute_updated_raw_score(story,shares_since,time_now) story.normalized_score = self._normalize_across_feeds(story.updated_raw_score,story.feed_id) query_params = ( story.updated_raw_score, story.total_shares,time_now, story.normalized_score, story.story_id ) dbi.execute("UPDATE news_social_score_update SET new_raw_score=%s , total_shares=%s, last_update=%s, new_normalized_score=%s,state='SCORE_COMPUTED' WHERE story_id=%s",query_params ) dbi.commit() dbi.autocommit(True) ''' Even this seems to work correctly. Efficiency i have no idea :p '''
def _check_cache(self): ''' Checks if the cache is stale ''' dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute( "SELECT MIN(cached_time) as oldest_cached FROM cache_news_user_interests WHERE user_id=%s",(self.user_id,) ) rows = cursor.fetchall() oldest_acceptable = int(time.time()) - UserCategoryInterests.acceptable_cache_age for row in rows: if row['oldest_cached'] < oldest_acceptable: dbi.execute("DELETE FROM cache_news_user_interests WHERE user_id=%s AND cached_time < %s", (self.user_id,oldest_acceptable) ) return False return True
def add_new_story(story_id): ''' Adds a story to both the active and global set ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute( "INSERT INTO news_social_score_all ( story_id, created ) (SELECT story_id,created FROM news_stories WHERE story_id=%s)", (story_id, )) dbi.execute( "INSERT INTO news_social_score_active ( normalized_score, story_id, raw_score ) (SELECT 0,story_id,0 FROM news_stories WHERE story_id=%s)", (story_id, )) dbi.commit() dbi.autocommit(True)
def _cache_interests(self): dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) time_now = int(time.time()) for category_id in self.categories: category = self.categories[category_id] query_params = (self.user_id,category.category_id, category.read_count, category.unnormalized_interest, category.interest, time_now) dbi.execute("REPLACE INTO cache_news_user_interests (user_id, category_id, read_count, unnormalized_interest, interest, cached_time) VALUES(%s,%s,%s,%s,%s,%s)", query_params ) dbi.commit() dbi.autocommit(True)
def load_candidate_set(self): dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute( "SELECT * FROM algos_followsuggestions_candidateset WHERE session_usr=%s" , (self.usr.usr_id) ) self.candidate_set = [ FollowCandidate( r["candidate_usr"], r["similarity"] , r["session_similarity"], r["follower_count"] ) for r in cursor.fetchall() ] return self.candidate_set
def update_permanent_scores(self): dbi = DatabaseInterface.get_shared_instance() rowcount=1 while rowcount > 0: cursor = dbi.execute("\ UPDATE ( SELECT story_id FROM news_social_score_update WHERE state='CONSIDERED_IN_SET' LIMIT %s)t_dummy JOIN news_social_score_update t_ud JOIN news_social_score_all t_all\ ON t_dummy.story_id = t_ud.story_id AND t_dummy.story_id=t_all.story_id SET\ t_all.reflected_in_stats = IF( (t_ud.new_raw_score > t_all.peak_score),0,t_all.reflected_in_stats) , \ t_ud.state='UPDATED_STORIES', t_all.total_shares=t_ud.total_shares, t_all.raw_score = t_ud.new_raw_score,\ t_all.peak_score=GREATEST(t_all.peak_score, t_ud.new_raw_score), t_all.last_update = t_ud.last_update \ ", (SocialScoreUpdater.story_chunk_size,) ) rowcount = cursor.rowcount dbi.commit()
def add_read_count(self,read_count,category_ids): #Seems to work dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) for category_id in category_ids: dbi.execute("\ INSERT INTO news_user_interests_read_count (user_id,category_id,read_count) VALUES( %s,%s, %s )\ ON DUPLICATE KEY UPDATE read_count= read_count+%s\ ", ( self.user_id, category_id, read_count, read_count ) ) dbi.commit() dbi.autocommit(True)
def add_read_count(self, read_count, category_ids): #Seems to work dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) for category_id in category_ids: dbi.execute( "\ INSERT INTO news_user_interests_read_count (user_id,category_id,read_count) VALUES( %s,%s, %s )\ ON DUPLICATE KEY UPDATE read_count= read_count+%s\ ", (self.user_id, category_id, read_count, read_count)) dbi.commit() dbi.autocommit(True)
def load_category_interests(self): ''' Ok, but how is the question? Everything in active set? ''' self.categories = dict() dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute( "SELECT category_id,read_count FROM news_user_interests_read_count ", None) rows = cursor.fetchall() print "ROWS", rows self._process_db_rows(rows) self._compute_interests() return self.categories
def _dynamically_add_feed_id_to_statistics(self, feed_id): dbi = DatabaseInterface.get_shared_instance() if self._feed_statistics is None: self._load_feed_statistics() global_average_peak_score = self._feed_statistics[0][0] global_average_std_deviation = self._feed_statistics[0][1] dbi.execute( "INSERT INTO news_social_score_feed_statistics (feed_id, average_peak_score, std_deviation, feed_n) VALUES( %s,%s,%s,%s)", (feed_id, global_average_peak_score, global_average_std_deviation, 0)) dbi.commit() self._feed_statistics[feed_id] = (global_average_peak_score, global_average_std_deviation)
def update_permanent_scores(self): dbi = DatabaseInterface.get_shared_instance() rowcount = 1 while rowcount > 0: cursor = dbi.execute( "\ UPDATE ( SELECT story_id FROM news_social_score_update WHERE state='CONSIDERED_IN_SET' LIMIT %s)t_dummy JOIN news_social_score_update t_ud JOIN news_social_score_all t_all\ ON t_dummy.story_id = t_ud.story_id AND t_dummy.story_id=t_all.story_id SET\ t_all.reflected_in_stats = IF( (t_ud.new_raw_score > t_all.peak_score),0,t_all.reflected_in_stats) , \ t_ud.state='UPDATED_STORIES', t_all.total_shares=t_ud.total_shares, t_all.raw_score = t_ud.new_raw_score,\ t_all.peak_score=GREATEST(t_all.peak_score, t_ud.new_raw_score), t_all.last_update = t_ud.last_update \ ", (SocialScoreUpdater.story_chunk_size, )) rowcount = cursor.rowcount dbi.commit()
def _dynamically_add_feed_id_to_statistics(self,feed_id): dbi = DatabaseInterface.get_shared_instance() if self._feed_statistics is None: self._load_feed_statistics() global_average_peak_score = self._feed_statistics[0][0] global_average_std_deviation = self._feed_statistics[0][1] dbi.execute( "INSERT INTO news_social_score_feed_statistics (feed_id, average_peak_score, std_deviation, feed_n) VALUES( %s,%s,%s,%s)", (feed_id, global_average_peak_score, global_average_std_deviation, 0) ) dbi.commit() self._feed_statistics[feed_id] = (global_average_peak_score,global_average_std_deviation)
def build_new_active_set(self): dbi = DatabaseInterface.get_shared_instance() #No read dbi.execute("DROP TABLE IF EXISTS news_social_score_active_new",None) #Creates a new table dbi.execute("CREATE TABLE news_social_score_active_new SELECT * FROM news_social_score_active LIMIT 0",None) #Creates a new table dbi.autocommit(False) ''' Add the highest scored articles into the new active set ''' rowcount = 1 while rowcount > 0: cursor = dbi.execute("UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE new_normalized_score > %s AND state='SCORE_COMPUTED' ORDER BY new_normalized_score DESC LIMIT %s", (SocialScoreUpdater.treshold_normalized_score, SocialScoreUpdater.story_chunk_size) ) rowcount = cursor.rowcount if rowcount==0: break dbi.execute("\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",None) dbi.commit() #Get those that lie below the treshold but are young enough to stay treshold_created_time = int(time.time()) - SocialScoreUpdater.treshold_age rowcount = 1 while rowcount > 0: cursor = dbi.execute("UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE created > %s AND state='SCORE_COMPUTED' LIMIT %s", (treshold_created_time,SocialScoreUpdater.story_chunk_size) ) rowcount = cursor.rowcount if rowcount==0: break dbi.execute("\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",None) dbi.commit() #Update the rest rowcount = 1 while rowcount > 0: cursor = dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='SCORE_COMPUTED' LIMIT %s", (SocialScoreUpdater.story_chunk_size,) ) rowcount = cursor.rowcount dbi.commit() dbi.commit() dbi.autocommit(True)
def load_category_interests(self): ''' Ok, but how is the question? Everything in active set? ''' self.categories = dict() dbi = DatabaseInterface.get_shared_instance() if UserCategoryInterests.use_caching and self._check_cache(): self._load_from_cache() else: cursor = dbi.execute("SELECT category_id,read_count FROM news_user_interests_read_count ",None) rows = cursor.fetchall() self._process_db_rows(rows) self._compute_interests() if UserCategoryInterests.use_caching: self._cache_interests() return self.categories
def expand_candidate_set(self, candidate_ids): # Ignore those that are already in the candidate set because what we're doing is expensive dbi = DatabaseInterface.get_shared_instance() in_placeholder = ",".join( ["%s"]* len(candidate_ids) ) cursor = dbi.execute( "SELECT candidate_usr FROM algos_followsuggestions_candidateset " "WHERE session_usr=%s AND candidate_usr IN ( " + in_placeholder + ") " , tuple( [self.usr.usr_id] + candidate_ids ) ) existing_ids = set(r["candidate_usr"] for r in cursor.fetchall()) candidate_id_set = set(candidate_ids) add_ids = candidate_id_set.difference( existing_ids ) # Load follower count: if len(add_ids)==0: return in_placeholder = ",".join( ["%s"] * len(add_ids) ) cursor = dbi.execute( "SELECT following, COUNT(*) AS follower_count FROM analytics_follow " # Blame django. I didn't normalize it. " WHERE following IN " "(" + in_placeholder + ") " " GROUP BY following" , tuple(add_ids) ) follower_count = {} for r in cursor.fetchall(): follower_count[ int(r["following"]) ] = r["follower_count"] similarities = self.compute_similarities( add_ids ) follow_candidates = [] for uid in add_ids: uid = int(uid) follow_candidates.append( FollowCandidate( uid, similarities.get(uid, 0), 1, follower_count.get(uid, 0) ) ) self.update_candidate_set(follow_candidates)
def _load_feed_statistics(self): self._feed_statistics = dict() #return self._mock_load_feed_statistics() dbi = DatabaseInterface.get_shared_instance() dbcursor = dbi.execute("SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics",None) avg_sum = float(0) std_dev_sum = float(0) i = 0 for row in dbcursor.fetchall(): self._feed_statistics[row["feed_id"]] = (row["average_peak_score"],row["std_deviation"] ) avg_sum += row["average_peak_score"] std_dev_sum += (float(row["std_deviation"])/max(1,row["average_peak_score"])) i+=1 if i==0: i=1 avg_avg = avg_sum / i avg_std_dev = (std_dev_sum/i) * avg_avg if avg_std_dev == 0: avg_std_dev = 1 self._feed_statistics[0] = ( avg_avg, avg_std_dev ) #If you hadn't figured it out by now, This is bullshit :p
def update_candidate_set(self, candidates): dbi = DatabaseInterface.get_shared_instance() query_str = ( "INSERT INTO algos_followsuggestions_candidateset (session_usr,candidate_usr, similarity, follower_count) " #" VALUES (%(session_usr)s, %(candidate_usr), %(similarity)s, %(follower_count)s )" " VALUES (%s, %s, %s, %s )" " ON DUPLICATE KEY UPDATE session_similarity=session_similarity+1" ) for candidate in candidates: print((self.usr.usr_id, candidate.usr_id, candidate.similarity, candidate.follower_count )) cursor = dbi.execute( query_str, (self.usr.usr_id, candidate.usr_id, candidate.similarity["similarity"], candidate.follower_count ) #query_str, #{ # "session_usr": self.usr.usr_id, # "candidate_usr": candidate.usr_id, # "similarity" : candidate.similarity, # "follower_count": candidate.follower_count #} )
def prepare_update(self): dbi = DatabaseInterface.get_shared_instance() dbi.execute('TRUNCATE news_social_score_update', None) last_retrieved_id = 0 dbi.autocommit(False) if self.update_all: ''' Tested. Seems to work. ''' rowcount = 1 while rowcount > 0: cursor = dbi.execute( "\ INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,created,state) \ ( SELECT story_id,last_update,raw_score,total_shares,created,'READY' FROM news_social_score_all \ WHERE story_id>( SELECT IFNULL(MAX(story_id), 0) FROM news_social_score_update ) LIMIT %s)", (SocialScoreUpdater.story_chunk_size, )) rowcount = cursor.rowcount #print rowcount dbi.commit() #dbi.execute("INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,state) ( SELECT story_id,last_update,raw_score,total_shares,'READY' FROM news_social_score_all) ",None) else: ''' Tested. Seems to work. ''' ''' Would the below query affect select queries on the active set? I don't need a write lock so it shouldn't ''' ''' The active set is assumed to be small enough for the mysql server to handle by itself. ''' dbi.execute( "INSERT INTO news_social_score_update (story_id,old_raw_score,state) ( SELECT story_id,raw_score,'READY' FROM news_social_score_active) ", None) dbi.execute( "\ UPDATE news_social_score_update JOIN news_social_score_all USING(story_id) SET\ news_social_score_update.total_shares = news_social_score_all.total_shares,\ news_social_score_update.last_update=news_social_score_all.last_update, \ news_social_score_update.created=news_social_score_all.created\ ", None) dbi.commit() dbi.autocommit(True)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, row["H"]) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) resolutions[str(width)+"x"+str(height)] = "OK" print(resolutions) if __name__ == '__main__': """ Create root user with admin rights from provided root.jpg """ from databaseinterface import DatabaseInterface from databasequeries import DatabaseQueries as Dbq dbi = DatabaseInterface(config.get('DATABASE', 'database'), config.get('DATABASE', 'username'), config.get('DATABASE', 'password'), config.get('DATABASE', 'ip_address')) username = config.get('BAARIMIKKO', 'root_name') img_path = config.get('BAARIMIKKO', 'root_photo') print(f'Create {username} user from faces/{img_path}') ask = input('Y/N?') if ask in ('Y', 'y', 'yes', 'Yes', 'YES'): try: pickled = learn(username, img_path) args = (username, pickled, img_path, True) dbi.execute_query(Dbq.CREATE_USER, args) except Exception as e: print(f"Error {e} while creating {username} user. Did you provide faces/{img_path} and is database set correctly?") else: print('Cancelled by user') exit(0)
def update_statistics(self): dbi = DatabaseInterface.get_shared_instance() rowcount = 1 #Let's use some python here. cursor = dbi.execute( "SELECT feed_id,peak_score FROM news_social_score_all JOIN news_stories USING(story_id) WHERE reflected_in_stats='0'", None) rows = cursor.fetchall() peak_scores = dict() for row in rows: if row['feed_id'] not in peak_scores: peak_scores[row['feed_id']] = [row['peak_score']] else: peak_scores[row['feed_id']].append(row['peak_score']) #Let's get stats for each feed cursor = dbi.execute( "SELECT feed_id,feed_n,average_peak_score,std_deviation FROM news_social_score_feed_statistics", None) rows = cursor.fetchall() stats = dict() for row in rows: stats[row['feed_id']] = row #Now we compute stuff one by one dbi.autocommit(False) for feed_id in peak_scores: sum = 0 squares = 0 #Sum of squares for pscore in peak_scores[feed_id]: sum += pscore squares += pscore * pscore ''' Std_dev(x) = E[X^2] - E[X] ''' added_n = len(peak_scores[feed_id]) added_sum = sum added_squares = squares # I STILL HAVE TO SUBTRACT THE MEAN stat = stats[feed_id] existing_n = stat['feed_n'] existing_avg = stat['average_peak_score'] existing_sum = existing_n * existing_avg existing_variance = (stat['std_deviation'] * stat['std_deviation'] ) #Actually, it's variance * n existing_squares = (existing_variance + (existing_avg * existing_avg)) * existing_n new_avg = float(added_sum + existing_sum) / max( 1, (added_n + existing_n)) new_squares = added_squares + existing_squares new_variance = new_squares / (existing_n + added_n) - (new_avg * new_avg) new_std_dev = math.sqrt(new_variance) new_n = existing_n + added_n #Now we want to be able to adapt to the changing popularity of feeds. We'll reduce the weights of ancient stats so that they don't keep us down. if new_n > SocialScoreUpdater.stat_update_upper: new_n = stat_update_lower update_params = (new_avg, new_std_dev, new_n, feed_id) dbi.execute( 'UPDATE news_social_score_feed_statistics SET average_peak_score=%s, std_deviation=%s,feed_n=%s WHERE feed_id=%s', update_params) ''' Whaaaaaaat? What did i just do? I need a way for it to quickly adapt to changes in popularity of the site. So i don't take the actual average of everything. I give some extra weight to more recent scores''' dbi.execute( "UPDATE news_social_score_all SET reflected_in_stats='1' WHERE reflected_in_stats='0'", None) dbi.commit() dbi.autocommit(True)
def build_new_active_set(self): dbi = DatabaseInterface.get_shared_instance() #No read dbi.execute("DROP TABLE IF EXISTS news_social_score_active_new", None) #Creates a new table dbi.execute( "CREATE TABLE news_social_score_active_new SELECT * FROM news_social_score_active LIMIT 0", None) #Creates a new table dbi.autocommit(False) ''' Add the highest scored articles into the new active set ''' rowcount = 1 while rowcount > 0: cursor = dbi.execute( "UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE new_normalized_score > %s AND state='SCORE_COMPUTED' ORDER BY new_normalized_score DESC LIMIT %s", (SocialScoreUpdater.treshold_normalized_score, SocialScoreUpdater.story_chunk_size)) rowcount = cursor.rowcount if rowcount == 0: break dbi.execute( "\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute( "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'", None) dbi.commit() #Get those that lie below the treshold but are young enough to stay treshold_created_time = int( time.time()) - SocialScoreUpdater.treshold_age rowcount = 1 while rowcount > 0: cursor = dbi.execute( "UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE created > %s AND state='SCORE_COMPUTED' LIMIT %s", (treshold_created_time, SocialScoreUpdater.story_chunk_size)) rowcount = cursor.rowcount if rowcount == 0: break dbi.execute( "\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute( "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'", None) dbi.commit() #Update the rest rowcount = 1 while rowcount > 0: cursor = dbi.execute( "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='SCORE_COMPUTED' LIMIT %s", (SocialScoreUpdater.story_chunk_size, )) rowcount = cursor.rowcount dbi.commit() dbi.commit() dbi.autocommit(True)
from databaseinterface import DatabaseInterface import json dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute("SELECT categories.name as category, title, brief, link FROM content JOIN feeds USING(feed_id) JOIN categories USING(category_id)",None) f = open("dump.json","w") f.write( json.dumps( cursor.fetchall(),indent=4, sort_keys=True ) ) f.close() #That was simple.
def update_scores(self): #Do it in chunks of story_chunk_size ''' Computes and updates the fields new_raw_score,normalized_score, last_updated, total_shares, state in news_social_score_updated, <story_chunk_size> stories at a time ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) self._load_feed_statistics() #print self._feed_statistics row_count = 1 while row_count > 0: cursor = dbi.execute( "\ SELECT story_id, feed_id, news_social_score_update.created, url, old_raw_score, total_shares, last_update FROM\ news_social_score_update JOIN news_stories USING(story_id)\ WHERE news_social_score_update.state='READY' LIMIT %s", (SocialScoreUpdater.story_chunk_size, )) #OMG A JOIN! if cursor.rowcount == 0: break rowcount = cursor.rowcount stories = list() urls = list() for row in cursor.fetchall(): stories.append( SocialScoreUpdater.SocialScoreStory( row['story_id'], row['url'], row['feed_id'], row['old_raw_score'], row['total_shares'], row['last_update']) ) #story_id,url,feed_id,previous_score=None,last_updated=None, urls.append(row['url']) #Get the score from Facebook's opengraph ogscraper = OpenGraphShareScraper() ogscraper.set_urls(urls) ogscraper.update_og_shares() og_shares = ogscraper.get_result() #Compute the new scores for each and update in DB ( But commit all at once ) time_now = int(time.time()) for story in stories: if story.url not in og_shares: new_total_shares = 0 #Nothing we can do. facebook has no records of it else: new_total_shares = og_shares[story.url] shares_since = new_total_shares - story.total_shares story.total_shares = new_total_shares story_id = story.story_id story.updated_raw_score = self._compute_updated_raw_score( story, shares_since, time_now) story.normalized_score = self._normalize_across_feeds( story.updated_raw_score, story.feed_id) query_params = (story.updated_raw_score, story.total_shares, time_now, story.normalized_score, story.story_id) dbi.execute( "UPDATE news_social_score_update SET new_raw_score=%s , total_shares=%s, last_update=%s, new_normalized_score=%s,state='SCORE_COMPUTED' WHERE story_id=%s", query_params) dbi.commit() dbi.autocommit(True) ''' Even this seems to work correctly. Efficiency i have no idea :p '''
def update_statistics(self): dbi = DatabaseInterface.get_shared_instance() rowcount=1 #Let's use some python here. cursor = dbi.execute("SELECT feed_id,peak_score FROM news_social_score_all JOIN news_stories USING(story_id) WHERE reflected_in_stats='0'",None) rows = cursor.fetchall() peak_scores = dict() for row in rows: if row['feed_id'] not in peak_scores: peak_scores[row['feed_id']] = [ row['peak_score'] ] else: peak_scores[row['feed_id']].append(row['peak_score']) #Let's get stats for each feed cursor = dbi.execute("SELECT feed_id,feed_n,average_peak_score,std_deviation FROM news_social_score_feed_statistics",None) rows = cursor.fetchall() stats= dict() for row in rows: stats[row['feed_id']] = row #Now we compute stuff one by one dbi.autocommit(False) for feed_id in peak_scores: sum = 0 squares = 0 #Sum of squares for pscore in peak_scores[feed_id]: sum += pscore squares += pscore * pscore ''' Std_dev(x) = E[X^2] - E[X] ''' added_n = len(peak_scores[feed_id]) added_sum = sum added_squares = squares # I STILL HAVE TO SUBTRACT THE MEAN stat = stats[feed_id] existing_n = stat['feed_n'] existing_avg = stat['average_peak_score'] existing_sum = existing_n * existing_avg existing_variance = (stat['std_deviation'] * stat['std_deviation']) #Actually, it's variance * n existing_squares= (existing_variance + (existing_avg*existing_avg)) * existing_n new_avg = float(added_sum + existing_sum ) / max(1,(added_n + existing_n)) new_squares = added_squares + existing_squares new_variance = new_squares/(existing_n+added_n) - (new_avg*new_avg) new_std_dev = math.sqrt( new_variance ) new_n = existing_n + added_n #Now we want to be able to adapt to the changing popularity of feeds. We'll reduce the weights of ancient stats so that they don't keep us down. if new_n > SocialScoreUpdater.stat_update_upper: new_n = stat_update_lower update_params = (new_avg,new_std_dev,new_n,feed_id) dbi.execute('UPDATE news_social_score_feed_statistics SET average_peak_score=%s, std_deviation=%s,feed_n=%s WHERE feed_id=%s', update_params) ''' Whaaaaaaat? What did i just do? I need a way for it to quickly adapt to changes in popularity of the site. So i don't take the actual average of everything. I give some extra weight to more recent scores''' dbi.execute("UPDATE news_social_score_all SET reflected_in_stats='1' WHERE reflected_in_stats='0'",None) dbi.commit() dbi.autocommit(True)
from databaseinterface import DatabaseInterface from databasequeries import DatabaseQueries as Dbq import configparser config = configparser.ConfigParser() config.read('config.ini') dbi = DatabaseInterface(config.get('DATABASE', 'database'), config.get('DATABASE', 'username'), config.get('DATABASE', 'password'), config.get('DATABASE', 'ip_address')) def store_recipe(data): print(data) # actual functionality to transplant to app.py function # check drink names and look for match drink = data["name"] drinks = dict(dbi.read_query(Dbq.ALL_DRINKS)) print(drinks) # select right method # TODO: remove duplicate lines if drink in drinks.values(): print('Update Recipe') # nuke old recipe update_id = dbi.read_query(Dbq.GET_DRINK_ID, (data["name"],))[0][0] dbi.execute_query(Dbq.DELETE_RECIPE, (update_id,)) # iterate over recipe and store to database for ingredient, quantity in data["ingredients"].items():
from databaseinterface import DatabaseInterface dbi = DatabaseInterface.get_shared_instance() # ''' values = [ (6,"Deshpande"),(7,"Nikhil Jain") , (8,"Krishnan"), (9,"Ishan"), (10,"Banerjee") ] for val in values: dbi.execute( "INSERT INTO test (id,name) VALUES (%s,%s)",val ) #''' selected = dbi.execute("SELECT * FROM test", None) #print selected.fetchall() cursor = dbi.execute("INSERT INTO TEST (id,name) VALUES (%s,%s)", (69, "Nikhil Iyer")) print cursor.lastrowid #print selected.fetchall() exit(0) from socialscoreupdater import SocialScoreUpdater as SSU ssu = SSU() ssu._load_feed_statistics() print ssu._feed_statistics