class StreamerDB: def __init__(self, game, streamer_name, stream_dicts): self.path = os.path.join(os.getcwd(), 'data', game, 'streamers', '{}.db'.format(streamer_name)) self.game = game self.streamer_name = streamer_name self.next_stream_count = 0 if self.db_exists(): self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) self.next_stream_count = len(self.db.get_table_names()) - 3 print('DB for: {} already exists and already holds {} stream tables'.format(streamer_name, self.next_stream_count)) else: self.create_db() self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) # This means that the overview and the streams table need to be created self.create_streams_table() self.create_overview_table() self.next_stream_count = len(self.db.get_table_names()) - 3 self.last_stream_stored = len(self.db.get_table_names()) - 3 self.stream_dicts = stream_dicts def run(self): self.import_csv_data() self.generate_overview_for_all_streams() def db_exists(self): return os.path.isfile(self.path) def create_db(self): if not self.db_exists(): print('Database for {} does not exist. Creating DB now.'.format(self.streamer_name)) copy_file( src=os.path.join(os.getcwd(), 'data', self.game, 'streamers', 'base', 'test_streamer.db'), dst=self.path ) else: print('Database for {} already exists'.format(self.streamer_name)) def create_overview_table(self): print('Creating the overview table for: {}'.format(self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE `overview` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL,' \ '`viewers_average` INTEGER NOT NULL,' \ '`viewers_peak` INTEGER NOT NULL,' \ '`followers` INTEGER NOT NULL,' \ '`average_time_streamed` INTEGER,' \ '`total_time_streamed` INTEGER NOT NULL,' \ '`partnership` INTEGER NOT NULL DEFAULT 0);' self.db.execute_sql(create_statement) def create_streams_table(self): print('Creating the streams table for: {}'.format(self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE `streams` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`timestamp` TEXT NOT NULL, `duration` INTEGER NOT NULL, `viewers_average` ' \ 'INTEGER NOT NULL, `viewers_peak` INTEGER NOT NULL, `follower_increase` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_stream_table(self): print('Creating stream_{} table for: {}'.format(self.next_stream_count, self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE "stream_{}" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL, `viewers` INTEGER NOT NULL, `followers` INTEGER NOT NULL, ' \ '`partnership`INTEGER NOT NULL DEFAULT 0)'.format(self.next_stream_count) self.db.execute_sql(create_statement) def import_csv_data(self): print('Importing CSV data into stream tables for: {}'.format(self.streamer_name)) for stream_dict in self.stream_dicts: # create a table for each CSV self.create_stream_table() # CSV schema is NAME, VIEWERS, FOLLOWERS, PARTNERSHIP, TIMESTAMP # DB schema is ID, TIMESTAMP, VIEWERS, FOLLOWERS, PARTNERSHIP raw_data_list = stream_dict['raw_data'] fixed_schema_list = [[row[4], row[1], row[2], row[3]] for row in raw_data_list] """ self.db.insert_rows( table='stream_{}'.format(self.next_stream_count), row_string='(NULL, ?, ?, ?, ?)', row_data_list=fixed_schema_list ) """ for row in tqdm(fixed_schema_list): self.db.insert_row( table='stream_{}'.format(self.next_stream_count), row_string='(NULL, ?, ?, ?, ?)', row_data=row) # generate a stream data row for the streams table self.generate_stream_data_row(stream_dict=stream_dict) # iterate the stream counter self.next_stream_count += 1 # update the number of streams stored self.last_stream_stored = len(self.db.get_table_names()) - 3 def generate_stream_data_row(self, stream_dict): # print('Generating stream overview') # Streams table schema: # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential timestamp = stream_dict['start_timestamp'] duration = stream_dict['duration'] viewers_list = [row[1] for row in stream_dict['raw_data']] viewers_average = calculate_average_from_list(viewers_list) viewers_peak = max(viewers_list) # last follower count - first follower count follower_delta = int(stream_dict['raw_data'][-1][2]) - int(stream_dict['raw_data'][0][2]) self.db.insert_row( table='streams', row_string='(NULL, ?, ?, ?, ?, ?)', row_data=[timestamp, duration, viewers_average, viewers_peak, follower_delta] ) def generate_overview_for_all_streams(self): print('Generating overview for all streams so far') # Streams table schema: # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential data = self.db.get_all_rows('streams') # get the duration data durations = [int(field[2]) for field in data] total_duration = sum(durations) total_average_duration = calculate_average_from_list(durations) # get the viewer data average_viewers_list = [int(field[3]) for field in data] total_average_viewers = calculate_average_from_list(average_viewers_list) peak_viewers_list = [int(field[4]) for field in data] try: highest_peak_viewers = max(peak_viewers_list) except ValueError: highest_peak_viewers = 0 # get the follower data from the latest stream table and not the overview data data = self.db.get_all_rows('stream_{}'.format(self.last_stream_stored - 1)) last_follower_count = data[-1][3] # get last partnership data from the latest stream table too partnered = data[-1][4] # Overview table schema: # ID, CURRENT TIMESTAMP, AVERAGE VIEWERS, PEAK VIEWERS, FOLLOWERS, AVERAGE STREAM DURATION, # TOTAL STREAM DURATION, PARTNERSHIP self.db.insert_row( table='overview', row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?)', row_data=[total_average_viewers, highest_peak_viewers, last_follower_count, total_average_duration, total_duration, partnered]) def return_last_overview(self): overviews = self.db.get_all_rows(table='overview') if len(overviews) == 0: return [] return overviews[-1] def return_stream_count(self): return self.next_stream_count
class GameDB: def __init__(self, game, streamer_dicts=None): self.path = os.path.join(os.getcwd(), 'data', game, '{}_data.db'.format(game)) self.game = game if self.db_exists(): self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False) else: self.create_db() self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False) # If the DB does not exist, then create the tables self.create_global_data_table() self.create_streamers_data_table() self.create_tier_bounds_table() self.create_tier_data_table() self.streamer_dicts = streamer_dicts def run(self): # update the streamers data streamers_to_update = self.get_streamers_already_stored() print('Additions: {}'.format(len(self.streamer_dicts) - len(streamers_to_update))) print('Updates: {}'.format(len(streamers_to_update))) time.sleep(0.1) # avoids same line progress bar for streamer_dict in tqdm(self.streamer_dicts): if streamer_dict['name'] in streamers_to_update: self.update_streamer_data(streamer_dict) self.update_streamer_tier(streamer_dict) else: self.insert_streamer_data(streamer_dict) self.add_streamer_tier(streamer_dict) # commit the data after updating as it does not do so itself self.db.dbcon.commit() # update the global data self.update_global_data() print('Vacuuming Database to retrieve space') # vacuum the old space now self.db.execute_sql('VACUUM') # commit the vacuum self.db.dbcon.commit() def db_exists(self): return os.path.isfile(self.path) def create_db(self): if not self.db_exists(): print('Database for the game: {} does not exist. Creating DB now.'.format(self.game)) copy_file( src=os.path.join(os.getcwd(), 'data', 'base', 'test_game.db'), dst=self.path ) else: print('Database for game: {} already exists'.format(self.game)) def create_global_data_table(self): print('Creating global data table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE "global_data" (' \ '`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL,' \ '`streamer_count` INTEGER NOT NULL,' \ '`stream_count` INTEGER NOT NULL,' \ '`average_time_streamed` INTEGER NOT NULL,' \ '`total_time_streamed` INTEGER NOT NULL,' \ '`longest_stream` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_streamers_data_table(self): print('Creating streamers data table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE "streamers_data" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`name` TEXT NOT NULL, ' \ '`last_updated` TEXT NOT NULL, ' \ '`viewers_average` INTEGER NOT NULL, ' \ '`viewers_peak` INTEGER NOT NULL, ' \ '`followers` INTEGER NOT NULL, ' \ '`stream_count` INTEGER NOT NULL, ' \ '`average_time_streamed` INTEGER NOT NULL, ' \ '`total_time_streamed` INTEGER NOT NULL, ' \ '`percentage_duration` REAL NOT NULL,' \ '`partnership` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_tier_bounds_table(self): print('Creating tier bounds table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE "tier_bounds" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`number` INTEGER NOT NULL, ' \ '`upper_bound` INTEGER NOT NULL, ' \ '`lower_bound` INTEGER NOT NULL)' self.db.execute_sql(create_statement) time.sleep(1) tier_amount = int(input('Please enter the number of tiers that will be present: ')) for i in range(0, tier_amount): i += 1 print('BOUND NUMBERS ARE BOTH INCLUSIVE. FOR 100 TO 50, ENTER 100 AS UPPER AND 50 AS LOWER') upper_bound = int(input('Please enter the upper bound for tier {}: '.format(i))) lower_bound = int(input('Please enter the lower bound for tier {}: '.format(i))) self.db.insert_row( table='tier_bounds', row_string='(NULL, ?, ?, ?)', row_data=[i, upper_bound, lower_bound]) def create_tier_data_table(self): print('Creating tier data table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE `tier_data` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`streamer_name` TEXT NOT NULL, ' \ '`streamer_tier` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def return_streamer_tier(self, average_viewers): bounds = self.db.get_all_rows('tier_bounds') for i, tier, upper, lower in bounds: if upper >= average_viewers >= lower: return tier else: return 0 # return the names of the streamers already stored def get_streamers_already_stored(self): streamers = self.db.get_all_rows('streamers_data') return [row[1] for row in streamers] def insert_streamer_data(self, streamer_dict): # print('Adding row for: {}'.format(streamer_dict['name'])) self.db.insert_row( table='streamers_data', row_string='(NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', row_data=[ streamer_dict['name'], streamer_dict['last_update'], streamer_dict['viewers_average'], streamer_dict['viewers_peak'], streamer_dict['followers'], streamer_dict['stream_count'], streamer_dict['average_duration'], streamer_dict['total_duration'], streamer_dict['percentage_duration'], streamer_dict['partnership'] ] ) def update_streamer_data(self, streamer_dict): # no neopysqlite method for updating rows yet :( # UPDATE table_name SET column1 = value1, columnN = valueN... WHERE name = `streamer_name` self.db.dbcur.execute('UPDATE streamers_data SET ' 'last_updated = ?,' 'viewers_average = ?,' 'viewers_peak = ?,' 'followers = ?,' 'stream_count = ?,' 'total_time_streamed = ?,' 'average_time_streamed = ?,' 'percentage_duration = ?,' 'partnership = ?' 'WHERE name = ?', ( streamer_dict['last_update'], streamer_dict['viewers_average'], streamer_dict['viewers_peak'], streamer_dict['followers'], streamer_dict['stream_count'], streamer_dict['total_duration'], streamer_dict['average_duration'], streamer_dict['percentage_duration'], streamer_dict['partnership'], streamer_dict['name'] )) def add_streamer_tier(self, streamer_dict): self.db.insert_row( table='tier_data', row_string='(NULL, ?, ?)', row_data=[ streamer_dict['name'], self.return_streamer_tier(average_viewers=streamer_dict['viewers_average']) ]) def update_streamer_tier(self, streamer_dict): self.db.dbcur.execute('UPDATE tier_data SET ' 'streamer_tier = ? ' 'WHERE streamer_name = ?', ( self.return_streamer_tier(average_viewers=streamer_dict['viewers_average']), streamer_dict['name'] )) def update_global_data(self): # update the global data table from all the new streamer data streamers_data = self.db.get_all_rows(table='streamers_data') # GLOBAL DATA SCHEMA: # ID, TIMESTAMP, STREAMER COUNT, STREAM COUNT, AVERAGE GLOBAL DURATION, TOTAL TIME STREAMED, LONGEST STREAM streamer_count = len(streamers_data) stream_count = sum([int(row[6]) for row in streamers_data]) durations = [int(row[8]) for row in streamers_data] total_global_duration = sum(durations) average_global_duration = calculate_average_from_list(durations) longest_stream = max(durations) self.db.insert_row( table='global_data', row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?)', row_data=[ streamer_count, stream_count, average_global_duration, total_global_duration, longest_stream ] )