def main(): if len( sys.argv ) is not 2: raise ValueError("Usage: python plot-ssim-stats.py frame_stats_directory") frame_stats_directory = sys.argv[1] _, dataset_title = os.path.split(os.path.abspath(frame_stats_directory)) per_trial_min_ssim_scores = [] all_ssim_scores = [] for f, _ in directory_traversal_helper.get_files_matching_regex(frame_stats_directory, "frame-stats.dat"): print("parsing " + f) with open(f) as frame_stats_file: trial_ssim_scores = [] for line in frame_stats_file: if re.search("first chunk request logged on server at ", line): continue #ignore first line string_match = re.search("ssim score ([0-9\.]+)", line) if string_match is None: print("Failed to parse ssim from line: " + line) ssim_of_frame = float(string_match.group(1)) trial_ssim_scores.append(ssim_of_frame) per_trial_min_ssim_scores.append(min(trial_ssim_scores)) all_ssim_scores += trial_ssim_scores if not all_ssim_scores: raise Exception("Couldn't parse any ssim values from " + frame_stats_directory) filename = dataset_title + "-ssim-cdf.dat" (xvals, yvals) = plotting_helper.downsample_cumulative_y(plotting_helper.get_cdf(all_ssim_scores)) plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename) (xvals, yvals) = plotting_helper.downsample_cumulative_y(plotting_helper.get_cdf(get_inverse_complement(all_ssim_scores))) filename = dataset_title + "-inverse-complement-ssim-cdf.dat" plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename) (xvals, yvals) = plotting_helper.get_cdf( per_trial_min_ssim_scores ) filename = dataset_title + "-min-ssim-cdf.dat" plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)
def main(): if len( sys.argv ) is not 2: raise ValueError("Usage: python plot-inter-frame-delay.py frame_stats_directory") frame_stats_directory = sys.argv[1] _, dataset_title = os.path.split(os.path.abspath(frame_stats_directory)) inter_frame_delays_list = [] resume_delays_list = [] rebuffering_ratios = [] num_files_parsed = 0 for f, _ in directory_traversal_helper.get_files_matching_regex(frame_stats_directory, "frame-stats.dat"): print("parsing " + f) num_files_parsed += 1 with open(f) as frame_stats_file: total_playback_time = 0 rebuffering_time = 0 first_line = True for line in frame_stats_file: if first_line: string_match = re.search("first chunk request logged on server at ([0-9]+\.[0-9]+)", line) if string_match is None: print("Failed to parse chunk request time from first line: " + line) previous_system_time = float(string_match.group(1)) previous_frame_shown = -100 first_line = False continue string_match = re.search("displayed at system time ([0-9\.]+) ", line) if string_match is None: print("Failed to parse system time from: " + line) system_time = float(string_match.group(1)) string_match = re.search("which is frame ([0-9]+)", line) if string_match is None: print("Failed to parse frame number from: " + line) frame_shown = int(string_match.group(1)) time_since_last_frame = system_time - previous_system_time assert(time_since_last_frame > -1) # make sure time is non-decreasing (this shows up a couple times probably because of NTP, # also for first frame display time with margin of error from first chunk request) if time_since_last_frame < 0: time_since_last_frame = 0 if (frame_shown - previous_frame_shown) > 24: # consider a seek if move forward >1s in video, this also includes first frame played resume_delays_list.append(time_since_last_frame) else: inter_frame_delays_list.append(time_since_last_frame) total_playback_time += time_since_last_frame if time_since_last_frame > .1: rebuffering_time += time_since_last_frame previous_system_time = system_time previous_frame_shown = frame_shown rebuffering_ratios.append(rebuffering_time / total_playback_time) if num_files_parsed is 0: raise ValueError("Found no frame-stats.dat files to parse") else: print("Finished parsing " + str(num_files_parsed) + " files") total_playback_time = sum( inter_frame_delays_list ) xvals = np.sort( inter_frame_delays_list ) yvals = [] subtotal_playback_time = total_playback_time for ifd in xvals: subtotal_playback_time -= ifd yvals.append(subtotal_playback_time / total_playback_time) (xvals, yvals) = plotting_helper.downsample_cumulative_y((xvals, yvals)) filename = dataset_title + "-proportional-playback.dat" plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename) (xvals, yvals) = plotting_helper.downsample_cumulative_y(plotting_helper.get_cdf(inter_frame_delays_list)) yvals = 1-np.array(yvals) # CCDF filename = dataset_title + "-inter-frame-delays-ccdf.dat" plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename) (xvals, yvals) = plotting_helper.get_cdf( resume_delays_list ) filename = dataset_title + "-resume-delays-cdf.dat" plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename) (xvals, yvals) = plotting_helper.get_cdf( rebuffering_ratios ) filename = dataset_title + "-rebuffering-ratios-cdf.dat" plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)
def main(): if len( sys.argv ) is not 4: raise ValueError("Usage: python parse-data.py SSIM_index_directory youtube_logs_directory output_directory") SSIM_index_directory = sys.argv[1] youtube_logs_directory = sys.argv[2] # First parse data from ssim indices into map used by playback log parser video_stats_lookup_maps = dict() for filename, resolution in directory_traversal_helper.get_files_matching_regex(SSIM_index_directory, "[0-9]+x([0-9]+)"): with open(filename) as SSIM_index_file: for line in SSIM_index_file: match_object = re.search("([0-9]+) ([0-9]+.[0-9]+) [A-Z] [0-9]+ ([0-9]+)", line) if match_object: displayed_frame_index = int(match_object.group(1)) SSIM_score = float(match_object.group(2)) byte_offset = int(match_object.group(3)) if resolution not in video_stats_lookup_maps: video_stats_lookup_maps[resolution] = ( defaultdict(int), defaultdict(float) ) (byte_offset_to_frame_index, frame_index_to_ssim) = video_stats_lookup_maps[resolution] byte_offset_to_frame_index[byte_offset] = displayed_frame_index frame_index_to_ssim[displayed_frame_index] = SSIM_score output_directory = sys.argv[3] if os.path.exists(output_directory): print("Removing existing contents of " + output_directory) shutil.rmtree(output_directory) os.mkdir(output_directory) trial_id_to_youtube_logs = dict() trial_id_to_stall_logs = dict() for f, trial_id in directory_traversal_helper.get_files_matching_regex(youtube_logs_directory, "stall-log-(.+).txt"): assert(trial_id not in trial_id_to_stall_logs) trial_id_to_stall_logs[trial_id] = f for f, trial_id in directory_traversal_helper.get_files_matching_regex(youtube_logs_directory, "^log-(.+).txt"): assert(trial_id not in trial_id_to_youtube_logs) trial_id_to_youtube_logs[trial_id] = f # check for missing log files, every stall log should have corresponding youtube log and vice versa missing_logs = False for trial_id in trial_id_to_stall_logs.keys(): if trial_id not in trial_id_to_youtube_logs: print("Missing youtube log for " + trial_id) missing_logs = True for trial_id in trial_id_to_youtube_logs.keys(): if trial_id not in trial_id_to_stall_logs: print("Missing stall log for " + trial_id) missing_logs = True if missing_logs: raise Exception("Logs missing") args_list = [] for trial_id, youtube_log in trial_id_to_youtube_logs.iteritems(): if trial_id not in trial_id_to_stall_logs: raise Exception("Missing stall log file for trial " + trial_id) # TODO maybe don't delete existing directory here os.mkdir(output_directory + "/" + trial_id) args_list.append((trial_id, youtube_log, trial_id_to_stall_logs[trial_id], video_stats_lookup_maps, output_directory)) if len(args_list) is 0: print("No log files found in " + youtube_logs_directory ) else: print("Processing " + str(2*len(args_list)) + " log files...") # Use process pool to parallelize calling get_inter_frame_delay Pool(processes=multiprocessing.cpu_count()).map(parse_logs_for_trial, args_list)