class StreamData: def __init__(self, streamer_name, game_name, stream_id): self.streamer_name = streamer_name self.game_name = game_name self.stream_id = int(stream_id) - 1 # Backend is zero indexed, frontend is not self.max_stream_id = 0 self.db = None def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name)) self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path) # set the max stream id self.max_stream_id = len(self.db.get_table_names()) - 3 def get_stream_data(self): stream_overview_row = self.db.get_specific_rows( table='streams', filter_string='id IS {}'.format(self.stream_id + 1)) # the db index is also not zero indexed... an oversight I know stream_dict = { 'id': self.stream_id + 1, 'max_id': self.max_stream_id, 'time_start': stream_overview_row[0][1], 'duration': convert_to_hours(stream_overview_row[0][2]), 'viewers_average': stream_overview_row[0][3], 'viewers_peak': stream_overview_row[0][4], 'follower_delta': stream_overview_row[0][5] } return stream_dict def get_stream_raw_data(self): raw_stream_data = self.db.get_all_rows(table='stream_{}'.format(self.stream_id)) return raw_stream_data def get_stream_viewer_data_json(self): # Timestamp in the X axis, viewer count in the Y axis data = [ [row[1], row[2]] for row in self.get_stream_raw_data() ] return json.dumps(data)
from tqdm import tqdm fix_stream_tables = True fix_game_tables = True games = ['ED', 'PC'] for game in games: streamers = os.listdir(os.path.join(os.getcwd(), 'data', game, 'streamers')) streamers.remove('base') print('Processing timestamps for streamers of game: {}'.format(game)) if fix_stream_tables: for streamer in tqdm(streamers): streamer_db_path = os.path.join(os.getcwd(), 'data', game, 'streamers', streamer) db = Pysqlite(database_name='{} DB'.format(streamer), database_file=streamer_db_path) table_count = len(db.get_table_names()) - 3 # print('{} has {} stream tables'.format(streamer, table_count)) table_names = ['stream_{}'.format(number) for number in range(0, table_count)] table_names.append('overview') table_names.append('streams') for table_name in table_names: rows = db.get_all_rows(table=table_name) for row in tqdm(rows): # convert anything in DD-MM-YYYY HH:MM:SS to YYYY-MM-DD HH:MM:SS old_timestamp = row[1] split_string = old_timestamp.split(' ') date_part = split_string[0].split('-') time_part = split_string[1].split(':') year, month, day = int(date_part[0]), int(date_part[1]), int(date_part[2]) hour, minute, second = int(time_part[0]), int(time_part[1]), int(time_part[2]) if day == 2016:
class TwitchStatisticsOutput: # bounds for the tiers of streamers tier_one_bounds = {'upper': 999999, 'lower': 100} tier_two_bounds = {'upper': 99, 'lower': 50} tier_three_bounds = {'upper': 49, 'lower': 15} tier_four_bounds = {'upper': 14, 'lower': 0} def __init__(self, game_name, game_shorthand, db_mid_directory, db_name_format='{}_stats.db', verbose=False): self.name = game_name self.shorthand = game_shorthand self.db_file_path = os.path.join(os.getcwd(), db_mid_directory, db_name_format.format(game_shorthand)) self.db = Pysqlite(database_name='twitch_stats', database_file=self.db_file_path) self.verbose = verbose def run(self): if self.verbose: print('Processing data for game: {}'.format(self.name)) tables = self.db.get_table_names() tables = [table for table in tables if table not in ['test', 'sqlite_sequence']] # get the table names which do not start with a number valid_named_tables = [table for table in tables if not table[0][0].isdigit()] # get the table names which start with a number number_start_tables = [table for table in tables if table[0][0].isdigit()] # reod the original table names valid_named_tables.extend(['_' + table for table in number_start_tables]) # initialise list for all the data all_streamer_data = [] # list any streamers to ignore streamers_to_ignore = ['legenddolby1986'] for streamer in tqdm(valid_named_tables): if streamer in streamers_to_ignore: # skip if its on the ignore list continue # get the db data from the table of the same name as the streamer and put it in the list^TM all_streamer_data.append(self.get_streamer_dict(streamer)) # write the data to the text file self.write_text_file(streamer_data=all_streamer_data) def return_streamer_tier(self, average_viewers): if self.tier_one_bounds['upper'] >= average_viewers >= self.tier_one_bounds['lower']: return 1 if self.tier_two_bounds['upper'] >= average_viewers >= self.tier_two_bounds['lower']: return 2 if self.tier_three_bounds['upper'] >= average_viewers >= self.tier_three_bounds['lower']: return 3 if self.tier_four_bounds['upper'] >= average_viewers >= self.tier_four_bounds['lower']: return 4 return 0 def get_streamer_dict(self, streamer): streamer_dict = dict() streamer_dict['name'] = streamer # catch an exception where the table cannot be found and return an empty dictionary instead try: data = self.db.get_all_rows(table=streamer) except PysqliteCouldNotRetrieveData: streamer_dict['partnership'] = False streamer_dict['tier'] = 4 streamer_dict['viewers'] = [] streamer_dict['viewers_max'] = 0 streamer_dict['viewers_average'] = 0.0 streamer_dict['followers'] = [] streamer_dict['followers_max'] = 0 streamer_dict['times'] = [] streamer_dict['durations'] = [] streamer_dict['durations_max'] = 0 streamer_dict['durations_average'] = 0.0 streamer_dict['durations_total'] = 0.0 streamer_dict['stream_count'] = 0 return streamer_dict streamer_dict['partnership'] = data[-1][3] == 1 viewers = [field[1] for field in data] streamer_dict['viewers'] = [field[1] for field in data] streamer_dict['viewers_max'] = max(viewers) streamer_dict['viewers_average'] = sum(viewers) // len(viewers) streamer_dict['tier'] = self.return_streamer_tier(streamer_dict['viewers_average']) followers = [field[2] for field in data] streamer_dict['followers'] = followers streamer_dict['followers_max'] = followers[-1] streamer_dict['times'] = [field[4] for field in data] # times streamer_dict['durations'] = get_stream_durations(streamer_dict['times']) streamer_dict['durations_max'] = max(streamer_dict['durations']) streamer_dict['durations_average'] = round(sum(streamer_dict['durations']), 2) streamer_dict['durations_total'] = round(sum(streamer_dict['durations']), 2) streamer_dict['stream_count'] = len(streamer_dict['durations']) return streamer_dict def write_text_file(self, streamer_data): durations = [streamer['durations'] for streamer in streamer_data] # get the longest consecutive stream non_empty_durations = [duration for duration in durations if not duration == []] longest_stream = max([max(duration_set) for duration_set in non_empty_durations]) # calculate total time streamed over all streamers total_duration_sums = sum([sum(duration_set) for duration_set in non_empty_durations]) total_duration = round(total_duration_sums, 2) total_streams = 0 # calculate the total number of discrete streams for streamer in streamer_data: total_streams += streamer['stream_count'] text_file_path = os.path.join(os.getcwd(), 'data', '{}_Twitch_Stats.txt'.format(self.shorthand)) with open(text_file_path, mode='w', encoding='utf-8') as file: file.write('{} Twitch Streamer Statistics\n'.format(self.name)) file.write('Data recorded 24/7 via twitch\'s public API every ~20 seconds\n') file.write('Script written by Simon Agius Muscat / CMDR Purrcat\n') file.write('More information can be found at: https://github.com/purrcat259/twitch-statistics\n') file.write('Total streamers recorded: {}\n'.format(len(streamer_data))) file.write('Total streams recorded: {}\n'.format(total_streams)) file.write('Total time streamed: {} hours\n'.format(total_duration)) file.write('Longest single stream: {} hours\n'.format(round(longest_stream, 2))) file.write('Tier One Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_one_bounds['upper'], self.tier_one_bounds['lower'])) file.write('Tier One Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 1]))) file.write('Tier Two Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_two_bounds['upper'], self.tier_two_bounds['lower'])) file.write('Tier Two Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 2]))) file.write('Tier Three Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_three_bounds['upper'], self.tier_three_bounds['lower'])) file.write('Tier Three Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 3]))) file.write('Tier Four Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_four_bounds['upper'], self.tier_four_bounds['lower'])) file.write('Tier Four Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 4]))) for streamer in streamer_data: # skip streamers with total durations less than 10 minutes if streamer['durations_total'] < 0.2: continue file.write('\nStreamer: {} (T{})\n'.format(streamer['name'], streamer['tier'])) file.write('Partnered: {} \n'.format(streamer['partnership'])) file.write('Average Viewers: {}\n'.format(streamer['viewers_average'])) file.write('Peak Viewers: {}\n'.format(streamer['viewers_max'])) file.write('Followers: {}\n'.format(streamer['followers_max'])) file.write('Stream count: {}\n'.format(streamer['stream_count'])) file.write('Average Stream duration: {} hours\n'.format(streamer['durations_average'])) file.write('Longest Stream duration: {} hours\n'.format(streamer['durations_max'])) file.write('Total time streamed: {} hours\n'.format(streamer['durations_total'])) time_percentage = round((streamer['durations_total'] / total_duration) * 100, 3) file.write('Percentage streamed of total duration: {}%\n'.format(time_percentage)) file.write('Stream durations:\n') for duration in streamer['durations']: if duration < 1.0: duration = round(duration * 60, 2) # skip stream durations less than 5 m inutes if duration < 5.0: continue file.write('\t{} minutes\n'.format(duration)) else: file.write('\t{} hours\n'.format(duration))
class StreamerDB: def __init__(self, game, streamer_name, stream_dicts): self.path = os.path.join(os.getcwd(), 'data', game, 'streamers', '{}.db'.format(streamer_name)) self.game = game self.streamer_name = streamer_name self.next_stream_count = 0 if self.db_exists(): self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) self.next_stream_count = len(self.db.get_table_names()) - 3 print('DB for: {} already exists and already holds {} stream tables'.format(streamer_name, self.next_stream_count)) else: self.create_db() self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) # This means that the overview and the streams table need to be created self.create_streams_table() self.create_overview_table() self.next_stream_count = len(self.db.get_table_names()) - 3 self.last_stream_stored = len(self.db.get_table_names()) - 3 self.stream_dicts = stream_dicts def run(self): self.import_csv_data() self.generate_overview_for_all_streams() def db_exists(self): return os.path.isfile(self.path) def create_db(self): if not self.db_exists(): print('Database for {} does not exist. Creating DB now.'.format(self.streamer_name)) copy_file( src=os.path.join(os.getcwd(), 'data', self.game, 'streamers', 'base', 'test_streamer.db'), dst=self.path ) else: print('Database for {} already exists'.format(self.streamer_name)) def create_overview_table(self): print('Creating the overview table for: {}'.format(self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE `overview` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL,' \ '`viewers_average` INTEGER NOT NULL,' \ '`viewers_peak` INTEGER NOT NULL,' \ '`followers` INTEGER NOT NULL,' \ '`average_time_streamed` INTEGER,' \ '`total_time_streamed` INTEGER NOT NULL,' \ '`partnership` INTEGER NOT NULL DEFAULT 0);' self.db.execute_sql(create_statement) def create_streams_table(self): print('Creating the streams table for: {}'.format(self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE `streams` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`timestamp` TEXT NOT NULL, `duration` INTEGER NOT NULL, `viewers_average` ' \ 'INTEGER NOT NULL, `viewers_peak` INTEGER NOT NULL, `follower_increase` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_stream_table(self): print('Creating stream_{} table for: {}'.format(self.next_stream_count, self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE "stream_{}" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL, `viewers` INTEGER NOT NULL, `followers` INTEGER NOT NULL, ' \ '`partnership`INTEGER NOT NULL DEFAULT 0)'.format(self.next_stream_count) self.db.execute_sql(create_statement) def import_csv_data(self): print('Importing CSV data into stream tables for: {}'.format(self.streamer_name)) for stream_dict in self.stream_dicts: # create a table for each CSV self.create_stream_table() # CSV schema is NAME, VIEWERS, FOLLOWERS, PARTNERSHIP, TIMESTAMP # DB schema is ID, TIMESTAMP, VIEWERS, FOLLOWERS, PARTNERSHIP raw_data_list = stream_dict['raw_data'] fixed_schema_list = [[row[4], row[1], row[2], row[3]] for row in raw_data_list] """ self.db.insert_rows( table='stream_{}'.format(self.next_stream_count), row_string='(NULL, ?, ?, ?, ?)', row_data_list=fixed_schema_list ) """ for row in tqdm(fixed_schema_list): self.db.insert_row( table='stream_{}'.format(self.next_stream_count), row_string='(NULL, ?, ?, ?, ?)', row_data=row) # generate a stream data row for the streams table self.generate_stream_data_row(stream_dict=stream_dict) # iterate the stream counter self.next_stream_count += 1 # update the number of streams stored self.last_stream_stored = len(self.db.get_table_names()) - 3 def generate_stream_data_row(self, stream_dict): # print('Generating stream overview') # Streams table schema: # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential timestamp = stream_dict['start_timestamp'] duration = stream_dict['duration'] viewers_list = [row[1] for row in stream_dict['raw_data']] viewers_average = calculate_average_from_list(viewers_list) viewers_peak = max(viewers_list) # last follower count - first follower count follower_delta = int(stream_dict['raw_data'][-1][2]) - int(stream_dict['raw_data'][0][2]) self.db.insert_row( table='streams', row_string='(NULL, ?, ?, ?, ?, ?)', row_data=[timestamp, duration, viewers_average, viewers_peak, follower_delta] ) def generate_overview_for_all_streams(self): print('Generating overview for all streams so far') # Streams table schema: # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential data = self.db.get_all_rows('streams') # get the duration data durations = [int(field[2]) for field in data] total_duration = sum(durations) total_average_duration = calculate_average_from_list(durations) # get the viewer data average_viewers_list = [int(field[3]) for field in data] total_average_viewers = calculate_average_from_list(average_viewers_list) peak_viewers_list = [int(field[4]) for field in data] try: highest_peak_viewers = max(peak_viewers_list) except ValueError: highest_peak_viewers = 0 # get the follower data from the latest stream table and not the overview data data = self.db.get_all_rows('stream_{}'.format(self.last_stream_stored - 1)) last_follower_count = data[-1][3] # get last partnership data from the latest stream table too partnered = data[-1][4] # Overview table schema: # ID, CURRENT TIMESTAMP, AVERAGE VIEWERS, PEAK VIEWERS, FOLLOWERS, AVERAGE STREAM DURATION, # TOTAL STREAM DURATION, PARTNERSHIP self.db.insert_row( table='overview', row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?)', row_data=[total_average_viewers, highest_peak_viewers, last_follower_count, total_average_duration, total_duration, partnered]) def return_last_overview(self): overviews = self.db.get_all_rows(table='overview') if len(overviews) == 0: return [] return overviews[-1] def return_stream_count(self): return self.next_stream_count