def _load_feed_statistics(self):
		self._feed_statistics = dict()
		#return self._mock_load_feed_statistics()
		dbi = DatabaseInterface.get_shared_instance()
		dbcursor = dbi.execute("SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics",None)
		for row in dbcursor.fetchall():
			self._feed_statistics[row["feed_id"]] = (row["average_peak_score"],row["std_deviation"] )
Ejemplo n.º 2
0
	def compute_similarities(self, candidate_ids ):
		""" Currently uses cosine similarity ( or i think it does )"""
		# This function determines the definition of similarity. 
		#If the length of the vectors ever get out of hand, edit this.
		reader_id = self.usr.usr_id
		info = dict()
		for candidate_id in candidate_ids:
			info[ int(candidate_id) ] = {"sims": 0, "total":1, "similarity":0 }
		
		
		
		dbi = DatabaseInterface.get_shared_instance()
		# Do two queries. One to find the intersection... 
		in_placeholder = ",".join( ["%s"]* len(candidate_ids) )
		cursor = dbi.execute(
			"SELECT analytics_usr_shares.usr as candidate, COUNT(*) as share_count FROM "
			" analytics_usr_markread JOIN analytics_usr_shares ON "
				" analytics_usr_markread.content = analytics_usr_shares.content "
			" WHERE analytics_usr_markread.usr=%s AND analytics_usr_shares.usr IN "
				"( " + in_placeholder + " ) "
			" GROUP BY candidate",
			tuple( [reader_id] + list(candidate_ids) )
		)
		
		
		for row in cursor.fetchall():
			if row["share_count"]!=0:	
				info[ int(row["candidate"]) ]["sims"] = row["share_count"]
		
		# And , one to find the total size
		cursor = dbi.execute(
			"SELECT usr as candidate, COUNT(*) as total_share_count FROM "
			" analytics_usr_shares "
			" WHERE usr IN "
				"( " + in_placeholder + " ) "
			" GROUP BY usr ",
			tuple( candidate_ids )
		)
		for row in cursor.fetchall():
			info[ int(row["candidate"]) ]["total"] = row["total_share_count"]
		
		cursor = dbi.execute(
			"SELECT COUNT(*) as total_markread_count FROM "
			" analytics_usr_markread "
			" WHERE usr=%s ",
			(reader_id,)
		)
		usr_total = float(cursor.fetchall()[0]["total_markread_count"])
		
		# Now let's compute and return.
		for candidate_id in candidate_ids:
			c_id = int(candidate_id)
			
			num = float( info[c_id]["sims"] )
			denom = math.sqrt( info[c_id]["total"] * usr_total )	#Cosine
			# denom = info[c_id]["total"] + usr_total - num # Some other distance
			
			info[c_id]["similarity"] = num/max(1,denom)	
			
		return info
Ejemplo n.º 3
0
 def _load_feed_statistics(self):
     self._feed_statistics = dict()
     #return self._mock_load_feed_statistics()
     dbi = DatabaseInterface.get_shared_instance()
     dbcursor = dbi.execute(
         "SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics",
         None)
     avg_sum = float(0)
     std_dev_sum = float(0)
     i = 0
     for row in dbcursor.fetchall():
         self._feed_statistics[row["feed_id"]] = (row["average_peak_score"],
                                                  row["std_deviation"])
         avg_sum += row["average_peak_score"]
         std_dev_sum += (float(row["std_deviation"]) /
                         max(1, row["average_peak_score"]))
         i += 1
     if i == 0:
         i = 1
     avg_avg = avg_sum / i
     avg_std_dev = (std_dev_sum / i) * avg_avg
     if avg_std_dev == 0:
         avg_std_dev = 1
     self._feed_statistics[0] = (
         avg_avg, avg_std_dev
     )  #If you hadn't figured it out by now, This is bullshit :p
	def _prepare_set(self):
		dbi = DatabaseInterface.get_shared_instance()
		dbi.execute("TRUNCATE news_social_score_update",None)
		if self.update_all:
			dbi.execute("INSERT INTO news_social_score_update( ) ( SELECT * FROM news_social_score_all LIMIT )")
		else:
			dbi.execute("INSERT INTO news_social_score_update( SELECT * FROM news_social_score_active LIMIT ")
Ejemplo n.º 5
0
	def _load_from_db(self):
		database_interface = DatabaseInterface.get_shared_instance()
		dbi = database_interface.dbi
		
		dbi.commit()
		
		dbi.autocommit(True)
	def prepare_update(self):
		dbi = DatabaseInterface.get_shared_instance()
		dbi.execute('TRUNCATE news_social_score_update',None)
		last_retrieved_id = 0
		dbi.autocommit(False)
		if self.update_all:
			''' Tested. Seems to work. '''
			rowcount = 1
			while rowcount>0:
				cursor = dbi.execute("\
					INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,created,state) \
					( SELECT story_id,last_update,raw_score,total_shares,created,'READY' FROM news_social_score_all \
						WHERE story_id>( SELECT IFNULL(MAX(story_id), 0) FROM news_social_score_update ) LIMIT %s)",(SocialScoreUpdater.story_chunk_size,))
				rowcount = cursor.rowcount
				#print rowcount
				dbi.commit()
			#dbi.execute("INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,state) ( SELECT story_id,last_update,raw_score,total_shares,'READY' FROM news_social_score_all) ",None)
			
		else:
			''' Tested. Seems to work. '''
			''' Would the below query affect select queries on the active set? I don't need a write lock so it shouldn't '''
			''' The active set is assumed to be small enough for the mysql server to handle by itself. '''
			dbi.execute("INSERT INTO news_social_score_update (story_id,old_raw_score,state) ( SELECT story_id,raw_score,'READY' FROM news_social_score_active) ",None)
			dbi.execute("\
				UPDATE news_social_score_update JOIN news_social_score_all USING(story_id) SET\
				news_social_score_update.total_shares = news_social_score_all.total_shares,\
				news_social_score_update.last_update=news_social_score_all.last_update, \
				news_social_score_update.created=news_social_score_all.created\
				",None
			)
			dbi.commit()
		
		dbi.autocommit(True)
	def add_new_story(story_id):
		''' Adds a story to both the active and global set '''
		dbi = DatabaseInterface.get_shared_instance()
		dbi.autocommit(False)
		dbi.execute("INSERT INTO news_social_score_all ( story_id, created ) (SELECT story_id,created FROM news_stories WHERE story_id=%s)",(story_id,))
		dbi.execute("INSERT INTO news_social_score_active ( normalized_score, story_id, raw_score ) (SELECT 0,story_id,0 FROM news_stories WHERE story_id=%s)", (story_id,))
		dbi.commit()
		dbi.autocommit(True)
	def rotate_active_set(self):
		#Swap the tables by renaming
		dbi = DatabaseInterface.get_shared_instance()
		dbi.autocommit(False)
		dbi.execute("DROP TABLE IF EXISTS news_social_score_active_old",None)	#Let's keep this just incase
		dbi.execute("RENAME TABLE news_social_score_active TO news_social_score_active_old, news_social_score_active_new TO news_social_score_active",None)
		dbi.commit()
		dbi.autocommit(True)
	def _load_from_cache(self):	
		''' implements very simple caching on the whole set. Idk why i bothered writing this but maybe it'll help -_- '''
		dbi = DatabaseInterface.get_shared_instance()
		cursor = dbi.execute( "SELECT category_id, read_count, unnormalized_interest, interest, cached_time FROM cache_news_user_interests WHERE user_id=%s",(self.user_id,) )
		rows = cursor.fetchall()
		temp_categories = dict()
		for row in rows:
			self.categories[ row['category_id'] ] = UserCategoryInterests.CategoryInterest(row['category_id'], row['read_count'], row['unnormalized_interest'], row['interest'])
		
		return True
Ejemplo n.º 10
0
	def _store_in_db(self):
		database_interface = DatabaseInterface.get_shared_instance()
		dbi = database_interface.dbi
		dbi.autocommit(False)
		
		for url in self.og_shares:
			dbi.execute("INSERT INTO news_socialscore_opengraphgshares",)
		dbi.commit()
		
		dbi.autocommit(True)
Ejemplo n.º 11
0
 def _load_feed_statistics(self):
     self._feed_statistics = dict()
     #return self._mock_load_feed_statistics()
     dbi = DatabaseInterface.get_shared_instance()
     dbcursor = dbi.execute(
         "SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics",
         None)
     for row in dbcursor.fetchall():
         self._feed_statistics[row["feed_id"]] = (row["average_peak_score"],
                                                  row["std_deviation"])
Ejemplo n.º 12
0
 def load_feed_ids(self):
     dbi = DatabaseInterface.get_shared_instance()
     cursor = dbi.execute(
         "SELECT feed_id,facebook_page_id FROM news_feeds WHERE facebook_page_id IS NOT NULL",
         None)
     rows = cursor.fetchall()
     self.feed_ids = dict()
     for row in rows:
         if row['facebook_page_id'] is None:
             continue
         self.feed_ids[row['facebook_page_id']] = row['feed_id']
Ejemplo n.º 13
0
 def _prepare_set(self):
     dbi = DatabaseInterface.get_shared_instance()
     dbi.execute("TRUNCATE news_social_score_update", None)
     if self.update_all:
         dbi.execute(
             "INSERT INTO news_social_score_update( ) ( SELECT * FROM news_social_score_all LIMIT )"
         )
     else:
         dbi.execute(
             "INSERT INTO news_social_score_update( SELECT * FROM news_social_score_active LIMIT "
         )
Ejemplo n.º 14
0
 def rotate_active_set(self):
     #Swap the tables by renaming
     dbi = DatabaseInterface.get_shared_instance()
     dbi.autocommit(False)
     dbi.execute("DROP TABLE IF EXISTS news_social_score_active_old",
                 None)  #Let's keep this just incase
     dbi.execute(
         "RENAME TABLE news_social_score_active TO news_social_score_active_old, news_social_score_active_new TO news_social_score_active",
         None)
     dbi.commit()
     dbi.autocommit(True)
	def load_category_interests(self):
		''' Ok, but how is the question? Everything in active set? '''
		self.categories = dict()
		dbi = DatabaseInterface.get_shared_instance()
		cursor = dbi.execute("SELECT category_id,read_count FROM news_user_interests_read_count ",None)
		rows = cursor.fetchall()
		print "ROWS",rows
		self._process_db_rows(rows)
		self._compute_interests()
		
		return self.categories
Ejemplo n.º 16
0
	def update_scores(self):	#Do it in chunks of story_chunk_size
		''' Computes and updates the fields new_raw_score,normalized_score, last_updated, total_shares, state in news_social_score_updated, <story_chunk_size> stories at a time '''
		dbi = DatabaseInterface.get_shared_instance()
		dbi.autocommit(False)
		
		self._load_feed_statistics()
		#print self._feed_statistics
		
		row_count = 1
		while row_count>0 :	
			cursor = dbi.execute("\
				SELECT story_id, feed_id, news_social_score_update.created, url, old_raw_score, total_shares, last_update FROM\
				news_social_score_update JOIN news_stories USING(story_id)\
				WHERE news_social_score_update.state='READY' LIMIT %s",
				(SocialScoreUpdater.story_chunk_size,)
			) #OMG A JOIN!
			if cursor.rowcount==0:
				break
			rowcount = cursor.rowcount
	
			stories = list()
			urls = list()
			for row in cursor.fetchall():
				stories.append( SocialScoreUpdater.SocialScoreStory(row['story_id'],row['url'],row['feed_id'],row['old_raw_score'],row['total_shares'],row['last_update']) ) #story_id,url,feed_id,previous_score=None,last_updated=None,
				urls.append( row['url'] )
			
			#Get the score from Facebook's opengraph
			ogscraper = OpenGraphShareScraper()
			ogscraper.set_urls( urls )
			ogscraper.update_og_shares()
			og_shares = ogscraper.get_result()
			
			#Compute the new scores for each and update in DB ( But commit all at once )
			time_now = int(time.time())
			for story in stories:
				if story.url not in og_shares:
					new_total_shares = 0			#Nothing we can do. facebook has no records of it
				else:
					new_total_shares = og_shares[story.url]
				shares_since = new_total_shares - story.total_shares
				story.total_shares = new_total_shares
				story_id = story.story_id 
				story.updated_raw_score = self._compute_updated_raw_score(story,shares_since,time_now)
				story.normalized_score = self._normalize_across_feeds(story.updated_raw_score,story.feed_id)
				
				query_params = ( story.updated_raw_score, story.total_shares,time_now, story.normalized_score, story.story_id )
				
				dbi.execute("UPDATE news_social_score_update SET new_raw_score=%s , total_shares=%s, last_update=%s, new_normalized_score=%s,state='SCORE_COMPUTED' WHERE story_id=%s",query_params )
				
			dbi.commit()
		
		dbi.autocommit(True)
		''' Even this seems to work correctly. Efficiency i have no idea :p '''
	def _check_cache(self):	
		''' Checks if the cache is stale '''
		dbi = DatabaseInterface.get_shared_instance()
		cursor = dbi.execute( "SELECT MIN(cached_time) as oldest_cached FROM cache_news_user_interests WHERE user_id=%s",(self.user_id,) )
		rows = cursor.fetchall()
		oldest_acceptable = int(time.time()) - UserCategoryInterests.acceptable_cache_age
		for row in rows:
			if row['oldest_cached'] < oldest_acceptable:
				dbi.execute("DELETE FROM cache_news_user_interests WHERE user_id=%s AND cached_time < %s", (self.user_id,oldest_acceptable) )
				return False
		
		return True
Ejemplo n.º 18
0
 def add_new_story(story_id):
     ''' Adds a story to both the active and global set '''
     dbi = DatabaseInterface.get_shared_instance()
     dbi.autocommit(False)
     dbi.execute(
         "INSERT INTO news_social_score_all ( story_id, created ) (SELECT story_id,created FROM news_stories WHERE story_id=%s)",
         (story_id, ))
     dbi.execute(
         "INSERT INTO news_social_score_active ( normalized_score, story_id, raw_score ) (SELECT 0,story_id,0 FROM news_stories WHERE story_id=%s)",
         (story_id, ))
     dbi.commit()
     dbi.autocommit(True)
	def _cache_interests(self):
		dbi = DatabaseInterface.get_shared_instance()
		dbi.autocommit(False)
		
		time_now = int(time.time())
		for category_id in self.categories:
			category = self.categories[category_id]
			query_params = (self.user_id,category.category_id, category.read_count, category.unnormalized_interest, category.interest, time_now)
			dbi.execute("REPLACE INTO cache_news_user_interests (user_id, category_id, read_count, unnormalized_interest, interest, cached_time) VALUES(%s,%s,%s,%s,%s,%s)", query_params )
		dbi.commit()
		
		dbi.autocommit(True)
		
Ejemplo n.º 20
0
	def load_candidate_set(self):
		dbi = DatabaseInterface.get_shared_instance()
		cursor = dbi.execute(
			"SELECT * FROM algos_followsuggestions_candidateset WHERE session_usr=%s"
			, (self.usr.usr_id)
		)
		self.candidate_set = [ 
			FollowCandidate(
				r["candidate_usr"], r["similarity"]
				, r["session_similarity"], r["follower_count"]
			) for r in cursor.fetchall()
		]
		return self.candidate_set
Ejemplo n.º 21
0
	def update_permanent_scores(self):
		dbi = DatabaseInterface.get_shared_instance()
		rowcount=1
		while rowcount > 0:
			cursor = dbi.execute("\
				UPDATE  ( SELECT story_id FROM news_social_score_update WHERE state='CONSIDERED_IN_SET' LIMIT %s)t_dummy JOIN news_social_score_update t_ud JOIN news_social_score_all t_all\
				ON t_dummy.story_id = t_ud.story_id  AND t_dummy.story_id=t_all.story_id SET\
				t_all.reflected_in_stats = IF( (t_ud.new_raw_score  > t_all.peak_score),0,t_all.reflected_in_stats) , \
				t_ud.state='UPDATED_STORIES', t_all.total_shares=t_ud.total_shares, t_all.raw_score = t_ud.new_raw_score,\
				t_all.peak_score=GREATEST(t_all.peak_score, t_ud.new_raw_score), t_all.last_update = t_ud.last_update \
				", (SocialScoreUpdater.story_chunk_size,) )
			rowcount = cursor.rowcount
			dbi.commit()
	def add_read_count(self,read_count,category_ids):	#Seems to work
		dbi = DatabaseInterface.get_shared_instance()
		dbi.autocommit(False)
		
		for category_id in category_ids:
			dbi.execute("\
				INSERT INTO news_user_interests_read_count (user_id,category_id,read_count) VALUES( %s,%s, %s )\
				ON DUPLICATE KEY UPDATE read_count= read_count+%s\
				", ( self.user_id, category_id, read_count, read_count )
			)
		dbi.commit()
		
		dbi.autocommit(True)
Ejemplo n.º 23
0
    def add_read_count(self, read_count, category_ids):  #Seems to work
        dbi = DatabaseInterface.get_shared_instance()
        dbi.autocommit(False)

        for category_id in category_ids:
            dbi.execute(
                "\
				INSERT INTO news_user_interests_read_count (user_id,category_id,read_count) VALUES( %s,%s, %s )\
				ON DUPLICATE KEY UPDATE read_count= read_count+%s\
				", (self.user_id, category_id, read_count, read_count))
        dbi.commit()

        dbi.autocommit(True)
Ejemplo n.º 24
0
    def load_category_interests(self):
        ''' Ok, but how is the question? Everything in active set? '''
        self.categories = dict()
        dbi = DatabaseInterface.get_shared_instance()
        cursor = dbi.execute(
            "SELECT category_id,read_count FROM news_user_interests_read_count ",
            None)
        rows = cursor.fetchall()
        print "ROWS", rows
        self._process_db_rows(rows)
        self._compute_interests()

        return self.categories
Ejemplo n.º 25
0
    def _dynamically_add_feed_id_to_statistics(self, feed_id):
        dbi = DatabaseInterface.get_shared_instance()
        if self._feed_statistics is None:
            self._load_feed_statistics()
        global_average_peak_score = self._feed_statistics[0][0]
        global_average_std_deviation = self._feed_statistics[0][1]
        dbi.execute(
            "INSERT INTO news_social_score_feed_statistics (feed_id, average_peak_score, std_deviation, feed_n) VALUES( %s,%s,%s,%s)",
            (feed_id, global_average_peak_score, global_average_std_deviation,
             0))
        dbi.commit()

        self._feed_statistics[feed_id] = (global_average_peak_score,
                                          global_average_std_deviation)
Ejemplo n.º 26
0
    def update_permanent_scores(self):
        dbi = DatabaseInterface.get_shared_instance()
        rowcount = 1
        while rowcount > 0:
            cursor = dbi.execute(
                "\
				UPDATE  ( SELECT story_id FROM news_social_score_update WHERE state='CONSIDERED_IN_SET' LIMIT %s)t_dummy JOIN news_social_score_update t_ud JOIN news_social_score_all t_all\
				ON t_dummy.story_id = t_ud.story_id  AND t_dummy.story_id=t_all.story_id SET\
				t_all.reflected_in_stats = IF( (t_ud.new_raw_score  > t_all.peak_score),0,t_all.reflected_in_stats) , \
				t_ud.state='UPDATED_STORIES', t_all.total_shares=t_ud.total_shares, t_all.raw_score = t_ud.new_raw_score,\
				t_all.peak_score=GREATEST(t_all.peak_score, t_ud.new_raw_score), t_all.last_update = t_ud.last_update \
				", (SocialScoreUpdater.story_chunk_size, ))
            rowcount = cursor.rowcount
            dbi.commit()
Ejemplo n.º 27
0
	def _dynamically_add_feed_id_to_statistics(self,feed_id):
		dbi = DatabaseInterface.get_shared_instance()
		if self._feed_statistics is None:
			self._load_feed_statistics()
		global_average_peak_score = self._feed_statistics[0][0]
		global_average_std_deviation = self._feed_statistics[0][1]
		dbi.execute(
			"INSERT INTO news_social_score_feed_statistics (feed_id, average_peak_score, std_deviation, feed_n) VALUES( %s,%s,%s,%s)", 
			(feed_id, global_average_peak_score, global_average_std_deviation, 0) 
		)
		dbi.commit()
		
		self._feed_statistics[feed_id] = (global_average_peak_score,global_average_std_deviation)
		
Ejemplo n.º 28
0
	def build_new_active_set(self):
		dbi = DatabaseInterface.get_shared_instance()
		#No read
		dbi.execute("DROP TABLE IF EXISTS news_social_score_active_new",None)	#Creates a new table
		dbi.execute("CREATE TABLE news_social_score_active_new SELECT * FROM news_social_score_active LIMIT 0",None)	#Creates a new table
		
		dbi.autocommit(False)
		
		''' Add the highest scored articles into the new active set '''
		rowcount = 1
		while  rowcount > 0:
			cursor = dbi.execute("UPDATE news_social_score_update SET state='ABOVE_TRESHOLD'  WHERE new_normalized_score > %s AND state='SCORE_COMPUTED' ORDER BY new_normalized_score DESC LIMIT %s", (SocialScoreUpdater.treshold_normalized_score, SocialScoreUpdater.story_chunk_size) )
			rowcount = cursor.rowcount
			if rowcount==0:
				break
			dbi.execute("\
				INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \
				( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\
				", None)
			dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",None)
			dbi.commit()
		
		#Get those that lie below the treshold but are young enough to stay
		treshold_created_time = int(time.time()) - SocialScoreUpdater.treshold_age
		rowcount = 1
		
		while rowcount > 0:
			cursor = dbi.execute("UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE created > %s AND state='SCORE_COMPUTED' LIMIT %s", (treshold_created_time,SocialScoreUpdater.story_chunk_size) )
			rowcount = cursor.rowcount
			if rowcount==0:
				break
			
			dbi.execute("\
				INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \
				( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\
				", None)
			dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",None)
			dbi.commit()
		
		#Update the rest
		rowcount = 1
		while rowcount > 0:
			cursor = dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='SCORE_COMPUTED' LIMIT %s", (SocialScoreUpdater.story_chunk_size,) )
			rowcount = cursor.rowcount
			dbi.commit()
		
		dbi.commit()
		dbi.autocommit(True)
	def load_category_interests(self):
		''' Ok, but how is the question? Everything in active set? '''
		self.categories = dict()
		dbi = DatabaseInterface.get_shared_instance()
		if UserCategoryInterests.use_caching and self._check_cache():
			self._load_from_cache()
		else:
			cursor = dbi.execute("SELECT category_id,read_count FROM news_user_interests_read_count ",None)
			rows = cursor.fetchall()
			
			self._process_db_rows(rows)
			self._compute_interests()
		
		if UserCategoryInterests.use_caching:
			self._cache_interests()
		
		return self.categories
Ejemplo n.º 30
0
	def expand_candidate_set(self, candidate_ids):
		# Ignore those that are already in the candidate set because what we're doing is expensive
		dbi = DatabaseInterface.get_shared_instance()
		in_placeholder = ",".join( ["%s"]* len(candidate_ids) )
		cursor = dbi.execute(
			"SELECT candidate_usr FROM algos_followsuggestions_candidateset "
			"WHERE session_usr=%s AND candidate_usr IN ( " + in_placeholder + ") "
			, tuple( [self.usr.usr_id] + candidate_ids )
		)
		existing_ids = set(r["candidate_usr"] for r in cursor.fetchall())
		candidate_id_set = set(candidate_ids)
		add_ids = candidate_id_set.difference( existing_ids )
		
		# Load follower count:
		if len(add_ids)==0:
			return
		
		in_placeholder = ",".join( ["%s"] * len(add_ids) )
		cursor = dbi.execute(
			"SELECT following, COUNT(*) AS follower_count FROM analytics_follow "	# Blame django. I didn't normalize it.
			" WHERE following IN "
			"(" + in_placeholder + ") "
			" GROUP BY following"
			, tuple(add_ids)
		)
		follower_count = {}
		for r in cursor.fetchall():
			follower_count[ int(r["following"]) ] = r["follower_count"]
		similarities = self.compute_similarities( add_ids )
		follow_candidates = []
		for uid in add_ids:
			uid = int(uid)
			follow_candidates.append( 
				FollowCandidate( 
					uid,
					similarities.get(uid, 0), 
					1, 
					follower_count.get(uid, 0) 
				)
			)
		
		self.update_candidate_set(follow_candidates)
Ejemplo n.º 31
0
	def _load_feed_statistics(self):
		self._feed_statistics = dict()
		#return self._mock_load_feed_statistics()
		dbi = DatabaseInterface.get_shared_instance()
		dbcursor = dbi.execute("SELECT feed_id, average_peak_score, std_deviation FROM news_social_score_feed_statistics",None)
		avg_sum = float(0)
		std_dev_sum = float(0)
		i = 0
		for row in dbcursor.fetchall():
			self._feed_statistics[row["feed_id"]] = (row["average_peak_score"],row["std_deviation"] )
			avg_sum += row["average_peak_score"]
			std_dev_sum += (float(row["std_deviation"])/max(1,row["average_peak_score"]))
			i+=1
		if i==0:
			i=1
		avg_avg = avg_sum / i
		avg_std_dev = (std_dev_sum/i) * avg_avg
		if avg_std_dev == 0:
			avg_std_dev = 1
		self._feed_statistics[0] = ( avg_avg, avg_std_dev ) #If you hadn't figured it out by now, This is bullshit :p
Ejemplo n.º 32
0
	def update_candidate_set(self, candidates):
		dbi = DatabaseInterface.get_shared_instance()
		query_str = (
			"INSERT INTO algos_followsuggestions_candidateset (session_usr,candidate_usr, similarity, follower_count) "
			#" VALUES (%(session_usr)s, %(candidate_usr), %(similarity)s, %(follower_count)s )"
			" VALUES (%s, %s, %s, %s )"
			" ON DUPLICATE KEY UPDATE session_similarity=session_similarity+1"
		)
		
		for candidate in candidates:
			print((self.usr.usr_id, candidate.usr_id, candidate.similarity, candidate.follower_count ))
			cursor = dbi.execute( 
				query_str,
				(self.usr.usr_id, candidate.usr_id, candidate.similarity["similarity"], candidate.follower_count )
				#query_str,
				#{
				#	"session_usr": self.usr.usr_id, 
				#	"candidate_usr": candidate.usr_id, 
				#	"similarity" : candidate.similarity, 
				#	"follower_count": candidate.follower_count 
				#}
			)
Ejemplo n.º 33
0
    def prepare_update(self):
        dbi = DatabaseInterface.get_shared_instance()
        dbi.execute('TRUNCATE news_social_score_update', None)
        last_retrieved_id = 0
        dbi.autocommit(False)
        if self.update_all:
            ''' Tested. Seems to work. '''
            rowcount = 1
            while rowcount > 0:
                cursor = dbi.execute(
                    "\
					INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,created,state) \
					( SELECT story_id,last_update,raw_score,total_shares,created,'READY' FROM news_social_score_all \
						WHERE story_id>( SELECT IFNULL(MAX(story_id), 0) FROM news_social_score_update ) LIMIT %s)",
                    (SocialScoreUpdater.story_chunk_size, ))
                rowcount = cursor.rowcount
                #print rowcount
                dbi.commit()
            #dbi.execute("INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,state) ( SELECT story_id,last_update,raw_score,total_shares,'READY' FROM news_social_score_all) ",None)

        else:
            ''' Tested. Seems to work. '''
            ''' Would the below query affect select queries on the active set? I don't need a write lock so it shouldn't '''
            ''' The active set is assumed to be small enough for the mysql server to handle by itself. '''
            dbi.execute(
                "INSERT INTO news_social_score_update (story_id,old_raw_score,state) ( SELECT story_id,raw_score,'READY' FROM news_social_score_active) ",
                None)
            dbi.execute(
                "\
				UPDATE news_social_score_update JOIN news_social_score_all USING(story_id) SET\
				news_social_score_update.total_shares = news_social_score_all.total_shares,\
				news_social_score_update.last_update=news_social_score_all.last_update, \
				news_social_score_update.created=news_social_score_all.created\
				", None)
            dbi.commit()

        dbi.autocommit(True)
Ejemplo n.º 34
0
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, row["H"])
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        resolutions[str(width)+"x"+str(height)] = "OK"

    print(resolutions)


if __name__ == '__main__':
    """
    Create root user with admin rights from provided root.jpg
    """
    from databaseinterface import DatabaseInterface
    from databasequeries import DatabaseQueries as Dbq
    dbi = DatabaseInterface(config.get('DATABASE', 'database'),
                            config.get('DATABASE', 'username'),
                            config.get('DATABASE', 'password'),
                            config.get('DATABASE', 'ip_address'))
    username = config.get('BAARIMIKKO', 'root_name')
    img_path = config.get('BAARIMIKKO', 'root_photo')
    print(f'Create {username} user from faces/{img_path}')
    ask = input('Y/N?')
    if ask in ('Y', 'y', 'yes', 'Yes', 'YES'):
        try:
            pickled = learn(username, img_path)
            args = (username, pickled, img_path, True)
            dbi.execute_query(Dbq.CREATE_USER, args)
        except Exception as e:
            print(f"Error {e} while creating {username} user. Did you provide faces/{img_path} and is database set correctly?")
    else:
        print('Cancelled by user')
        exit(0)
Ejemplo n.º 35
0
    def update_statistics(self):
        dbi = DatabaseInterface.get_shared_instance()
        rowcount = 1
        #Let's use some python here.
        cursor = dbi.execute(
            "SELECT feed_id,peak_score FROM news_social_score_all JOIN news_stories USING(story_id) WHERE reflected_in_stats='0'",
            None)
        rows = cursor.fetchall()
        peak_scores = dict()
        for row in rows:
            if row['feed_id'] not in peak_scores:
                peak_scores[row['feed_id']] = [row['peak_score']]
            else:
                peak_scores[row['feed_id']].append(row['peak_score'])

        #Let's get stats for each feed
        cursor = dbi.execute(
            "SELECT feed_id,feed_n,average_peak_score,std_deviation FROM news_social_score_feed_statistics",
            None)
        rows = cursor.fetchall()
        stats = dict()
        for row in rows:
            stats[row['feed_id']] = row

        #Now we compute stuff one by one
        dbi.autocommit(False)

        for feed_id in peak_scores:
            sum = 0
            squares = 0  #Sum of squares
            for pscore in peak_scores[feed_id]:
                sum += pscore
                squares += pscore * pscore
            ''' Std_dev(x) = E[X^2] - E[X] '''

            added_n = len(peak_scores[feed_id])
            added_sum = sum
            added_squares = squares  # I STILL HAVE TO SUBTRACT THE MEAN

            stat = stats[feed_id]
            existing_n = stat['feed_n']
            existing_avg = stat['average_peak_score']
            existing_sum = existing_n * existing_avg

            existing_variance = (stat['std_deviation'] * stat['std_deviation']
                                 )  #Actually, it's variance * n
            existing_squares = (existing_variance +
                                (existing_avg * existing_avg)) * existing_n

            new_avg = float(added_sum + existing_sum) / max(
                1, (added_n + existing_n))
            new_squares = added_squares + existing_squares

            new_variance = new_squares / (existing_n + added_n) - (new_avg *
                                                                   new_avg)
            new_std_dev = math.sqrt(new_variance)

            new_n = existing_n + added_n
            #Now we want to be able to adapt to the changing popularity of feeds. We'll reduce the weights of ancient stats so that they don't keep us down.
            if new_n > SocialScoreUpdater.stat_update_upper:
                new_n = stat_update_lower
            update_params = (new_avg, new_std_dev, new_n, feed_id)
            dbi.execute(
                'UPDATE news_social_score_feed_statistics SET average_peak_score=%s, std_deviation=%s,feed_n=%s WHERE feed_id=%s',
                update_params)
            ''' Whaaaaaaat? What did i just do? 
			I need a way for it to quickly adapt to changes in popularity of the site. 
			So i don't take the actual average of everything. I give some extra weight to more recent scores'''

        dbi.execute(
            "UPDATE news_social_score_all SET reflected_in_stats='1' WHERE reflected_in_stats='0'",
            None)
        dbi.commit()
        dbi.autocommit(True)
Ejemplo n.º 36
0
    def build_new_active_set(self):
        dbi = DatabaseInterface.get_shared_instance()
        #No read
        dbi.execute("DROP TABLE IF EXISTS news_social_score_active_new",
                    None)  #Creates a new table
        dbi.execute(
            "CREATE TABLE news_social_score_active_new SELECT * FROM news_social_score_active LIMIT 0",
            None)  #Creates a new table

        dbi.autocommit(False)
        ''' Add the highest scored articles into the new active set '''
        rowcount = 1
        while rowcount > 0:
            cursor = dbi.execute(
                "UPDATE news_social_score_update SET state='ABOVE_TRESHOLD'  WHERE new_normalized_score > %s AND state='SCORE_COMPUTED' ORDER BY new_normalized_score DESC LIMIT %s",
                (SocialScoreUpdater.treshold_normalized_score,
                 SocialScoreUpdater.story_chunk_size))
            rowcount = cursor.rowcount
            if rowcount == 0:
                break
            dbi.execute(
                "\
				INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \
				( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\
				", None)
            dbi.execute(
                "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",
                None)
            dbi.commit()

        #Get those that lie below the treshold but are young enough to stay
        treshold_created_time = int(
            time.time()) - SocialScoreUpdater.treshold_age
        rowcount = 1

        while rowcount > 0:
            cursor = dbi.execute(
                "UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE created > %s AND state='SCORE_COMPUTED' LIMIT %s",
                (treshold_created_time, SocialScoreUpdater.story_chunk_size))
            rowcount = cursor.rowcount
            if rowcount == 0:
                break

            dbi.execute(
                "\
				INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \
				( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\
				", None)
            dbi.execute(
                "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",
                None)
            dbi.commit()

        #Update the rest
        rowcount = 1
        while rowcount > 0:
            cursor = dbi.execute(
                "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='SCORE_COMPUTED' LIMIT %s",
                (SocialScoreUpdater.story_chunk_size, ))
            rowcount = cursor.rowcount
            dbi.commit()

        dbi.commit()
        dbi.autocommit(True)
Ejemplo n.º 37
0
from databaseinterface import DatabaseInterface
import json 

dbi = DatabaseInterface.get_shared_instance()

cursor = dbi.execute("SELECT categories.name as category, title, brief, link FROM content JOIN feeds USING(feed_id) JOIN categories USING(category_id)",None)

f = open("dump.json","w")
f.write( json.dumps( cursor.fetchall(),indent=4, sort_keys=True ) )
f.close()

#That was simple.
Ejemplo n.º 38
0
    def update_scores(self):  #Do it in chunks of story_chunk_size
        ''' Computes and updates the fields new_raw_score,normalized_score, last_updated, total_shares, state in news_social_score_updated, <story_chunk_size> stories at a time '''
        dbi = DatabaseInterface.get_shared_instance()
        dbi.autocommit(False)

        self._load_feed_statistics()
        #print self._feed_statistics

        row_count = 1
        while row_count > 0:
            cursor = dbi.execute(
                "\
				SELECT story_id, feed_id, news_social_score_update.created, url, old_raw_score, total_shares, last_update FROM\
				news_social_score_update JOIN news_stories USING(story_id)\
				WHERE news_social_score_update.state='READY' LIMIT %s",
                (SocialScoreUpdater.story_chunk_size, ))  #OMG A JOIN!
            if cursor.rowcount == 0:
                break
            rowcount = cursor.rowcount

            stories = list()
            urls = list()
            for row in cursor.fetchall():
                stories.append(
                    SocialScoreUpdater.SocialScoreStory(
                        row['story_id'], row['url'], row['feed_id'],
                        row['old_raw_score'], row['total_shares'],
                        row['last_update'])
                )  #story_id,url,feed_id,previous_score=None,last_updated=None,
                urls.append(row['url'])

            #Get the score from Facebook's opengraph
            ogscraper = OpenGraphShareScraper()
            ogscraper.set_urls(urls)
            ogscraper.update_og_shares()
            og_shares = ogscraper.get_result()

            #Compute the new scores for each and update in DB ( But commit all at once )
            time_now = int(time.time())
            for story in stories:
                if story.url not in og_shares:
                    new_total_shares = 0  #Nothing we can do. facebook has no records of it
                else:
                    new_total_shares = og_shares[story.url]
                shares_since = new_total_shares - story.total_shares
                story.total_shares = new_total_shares
                story_id = story.story_id
                story.updated_raw_score = self._compute_updated_raw_score(
                    story, shares_since, time_now)
                story.normalized_score = self._normalize_across_feeds(
                    story.updated_raw_score, story.feed_id)

                query_params = (story.updated_raw_score, story.total_shares,
                                time_now, story.normalized_score,
                                story.story_id)

                dbi.execute(
                    "UPDATE news_social_score_update SET new_raw_score=%s , total_shares=%s, last_update=%s, new_normalized_score=%s,state='SCORE_COMPUTED' WHERE story_id=%s",
                    query_params)

            dbi.commit()

        dbi.autocommit(True)
        ''' Even this seems to work correctly. Efficiency i have no idea :p '''
Ejemplo n.º 39
0
	def update_statistics(self):
		dbi = DatabaseInterface.get_shared_instance()
		rowcount=1
		#Let's use some python here.
		cursor = dbi.execute("SELECT feed_id,peak_score FROM news_social_score_all JOIN news_stories USING(story_id) WHERE reflected_in_stats='0'",None)
		rows = cursor.fetchall()
		peak_scores = dict()
		for row in rows:
			if row['feed_id'] not in peak_scores:
				peak_scores[row['feed_id']] = [ row['peak_score'] ]
			else:
				peak_scores[row['feed_id']].append(row['peak_score'])
		
		#Let's get stats for each feed
		cursor = dbi.execute("SELECT feed_id,feed_n,average_peak_score,std_deviation FROM news_social_score_feed_statistics",None)
		rows = cursor.fetchall()
		stats= dict()
		for row in rows:
			stats[row['feed_id']] = row
		
		#Now we compute stuff one by one
		dbi.autocommit(False)
		
		for feed_id in peak_scores:
			sum = 0
			squares = 0	#Sum of squares
			for pscore in peak_scores[feed_id]:
				sum += pscore
				squares += pscore * pscore
			
			''' Std_dev(x) = E[X^2] - E[X] '''
			
			added_n = len(peak_scores[feed_id])
			added_sum = sum
			added_squares = squares												# I STILL HAVE TO SUBTRACT THE MEAN
			
			stat = stats[feed_id]
			existing_n = stat['feed_n']
			existing_avg = stat['average_peak_score']
			existing_sum = existing_n * existing_avg
			
			existing_variance = (stat['std_deviation'] * stat['std_deviation']) 	#Actually, it's variance * n
			existing_squares= (existing_variance + (existing_avg*existing_avg)) * existing_n
			
			
			new_avg = float(added_sum + existing_sum ) / max(1,(added_n + existing_n))
			new_squares = added_squares + existing_squares
			
			new_variance = new_squares/(existing_n+added_n) - (new_avg*new_avg)
			new_std_dev = math.sqrt( new_variance )
			
			
			new_n = existing_n + added_n
			#Now we want to be able to adapt to the changing popularity of feeds. We'll reduce the weights of ancient stats so that they don't keep us down.
			if new_n > SocialScoreUpdater.stat_update_upper:
				new_n = stat_update_lower
			update_params = (new_avg,new_std_dev,new_n,feed_id)
			dbi.execute('UPDATE news_social_score_feed_statistics SET average_peak_score=%s, std_deviation=%s,feed_n=%s WHERE feed_id=%s', update_params)
			''' Whaaaaaaat? What did i just do? 
			I need a way for it to quickly adapt to changes in popularity of the site. 
			So i don't take the actual average of everything. I give some extra weight to more recent scores'''
			
		dbi.execute("UPDATE news_social_score_all SET reflected_in_stats='1' WHERE reflected_in_stats='0'",None)
		dbi.commit()
		dbi.autocommit(True)
Ejemplo n.º 40
0
from databaseinterface import DatabaseInterface
from databasequeries import DatabaseQueries as Dbq
import configparser

config = configparser.ConfigParser()
config.read('config.ini')

dbi = DatabaseInterface(config.get('DATABASE', 'database'),
                        config.get('DATABASE', 'username'),
                        config.get('DATABASE', 'password'),
                        config.get('DATABASE', 'ip_address'))


def store_recipe(data):
    print(data)
    # actual functionality to transplant to app.py function

    # check drink names and look for match
    drink = data["name"]
    drinks = dict(dbi.read_query(Dbq.ALL_DRINKS))
    print(drinks)
    # select right method
    # TODO: remove duplicate lines
    if drink in drinks.values():
        print('Update Recipe')
        # nuke old recipe
        update_id = dbi.read_query(Dbq.GET_DRINK_ID, (data["name"],))[0][0]
        dbi.execute_query(Dbq.DELETE_RECIPE, (update_id,))

        # iterate over recipe and store to database
        for ingredient, quantity in data["ingredients"].items():
Ejemplo n.º 41
0
from databaseinterface import DatabaseInterface

dbi = DatabaseInterface.get_shared_instance()

#
'''
values = [ (6,"Deshpande"),(7,"Nikhil Jain") , (8,"Krishnan"), (9,"Ishan"), (10,"Banerjee") ]
for val in values:
	dbi.execute( "INSERT INTO test (id,name) VALUES (%s,%s)",val )
#'''

selected = dbi.execute("SELECT * FROM test", None)

#print selected.fetchall()
cursor = dbi.execute("INSERT INTO TEST (id,name) VALUES (%s,%s)",
                     (69, "Nikhil Iyer"))
print cursor.lastrowid
#print selected.fetchall()

exit(0)

from socialscoreupdater import SocialScoreUpdater as SSU

ssu = SSU()
ssu._load_feed_statistics()

print ssu._feed_statistics