Python get_files_matching_regex Beispiele, directory_traversal_helper.get_files_matching_regex Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: plot-ssim.py Projekt: alfalfa/analysis

def main():
    if len( sys.argv ) is not 2:
        raise ValueError("Usage: python plot-ssim-stats.py frame_stats_directory")
    frame_stats_directory = sys.argv[1]
    _, dataset_title  = os.path.split(os.path.abspath(frame_stats_directory))

    per_trial_min_ssim_scores = []
    all_ssim_scores = []

    for f, _ in directory_traversal_helper.get_files_matching_regex(frame_stats_directory, "frame-stats.dat"):
        print("parsing " + f)
        with open(f) as frame_stats_file:
            trial_ssim_scores = []
            for line in frame_stats_file:
                if re.search("first chunk request logged on server at ", line):
                    continue #ignore first line
                string_match = re.search("ssim score ([0-9\.]+)", line)
                if string_match is None:
                    print("Failed to parse ssim from line: " + line)
                ssim_of_frame = float(string_match.group(1))
                trial_ssim_scores.append(ssim_of_frame)

            per_trial_min_ssim_scores.append(min(trial_ssim_scores))
            all_ssim_scores += trial_ssim_scores
    if not all_ssim_scores:
        raise Exception("Couldn't parse any ssim values from " + frame_stats_directory)


    filename = dataset_title + "-ssim-cdf.dat"
    (xvals, yvals) = plotting_helper.downsample_cumulative_y(plotting_helper.get_cdf(all_ssim_scores))
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

    (xvals, yvals) = plotting_helper.downsample_cumulative_y(plotting_helper.get_cdf(get_inverse_complement(all_ssim_scores)))
    filename = dataset_title + "-inverse-complement-ssim-cdf.dat"
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

    (xvals, yvals) = plotting_helper.get_cdf( per_trial_min_ssim_scores )
    filename = dataset_title + "-min-ssim-cdf.dat"
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

Beispiel #2

0

Datei anzeigen

Datei: plot-delays.py Projekt: alfalfa/analysis

def main():
    if len( sys.argv ) is not 2:
        raise ValueError("Usage: python plot-inter-frame-delay.py frame_stats_directory")
    frame_stats_directory = sys.argv[1]
    _, dataset_title  = os.path.split(os.path.abspath(frame_stats_directory))

    inter_frame_delays_list = []
    resume_delays_list = []
    rebuffering_ratios = []
    num_files_parsed = 0
    for f, _ in directory_traversal_helper.get_files_matching_regex(frame_stats_directory, "frame-stats.dat"):
        print("parsing " + f)
        num_files_parsed += 1
        with open(f) as frame_stats_file:
            total_playback_time = 0
            rebuffering_time = 0
            first_line = True
            for line in frame_stats_file:
                if first_line:
                    string_match = re.search("first chunk request logged on server at ([0-9]+\.[0-9]+)", line)
                    if string_match is None:
                        print("Failed to parse chunk request time from first line: " + line)
                    previous_system_time = float(string_match.group(1))
                    previous_frame_shown = -100
                    first_line = False
                    continue

                string_match = re.search("displayed at system time ([0-9\.]+) ", line)
                if string_match is None:
                    print("Failed to parse system time from: " + line)
                system_time = float(string_match.group(1))

                string_match = re.search("which is frame ([0-9]+)", line)

                if string_match is None:
                    print("Failed to parse frame number from: " + line)
                frame_shown = int(string_match.group(1))

                time_since_last_frame = system_time - previous_system_time
                assert(time_since_last_frame > -1)

                # make sure time is non-decreasing (this shows up a couple times probably because of NTP,
                # also for first frame display time with margin of error from first chunk request)
                if time_since_last_frame < 0:
                    time_since_last_frame = 0

                if (frame_shown - previous_frame_shown) > 24: # consider a seek if move forward >1s in video, this also includes first frame played
                    resume_delays_list.append(time_since_last_frame)
                else:
                    inter_frame_delays_list.append(time_since_last_frame)
                    total_playback_time += time_since_last_frame
                    if time_since_last_frame > .1:
                        rebuffering_time += time_since_last_frame


                previous_system_time = system_time
                previous_frame_shown = frame_shown

            rebuffering_ratios.append(rebuffering_time / total_playback_time)

    if num_files_parsed is 0:
        raise ValueError("Found no frame-stats.dat files to parse")
    else:
        print("Finished parsing " + str(num_files_parsed) + " files")

    total_playback_time = sum( inter_frame_delays_list )
    xvals = np.sort( inter_frame_delays_list )
    yvals = []
    subtotal_playback_time = total_playback_time
    for ifd in xvals:
        subtotal_playback_time -= ifd
        yvals.append(subtotal_playback_time / total_playback_time)

    (xvals, yvals) = plotting_helper.downsample_cumulative_y((xvals, yvals))
    filename = dataset_title + "-proportional-playback.dat"
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

    (xvals, yvals) = plotting_helper.downsample_cumulative_y(plotting_helper.get_cdf(inter_frame_delays_list))
    yvals = 1-np.array(yvals) # CCDF
    filename = dataset_title + "-inter-frame-delays-ccdf.dat"
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

    (xvals, yvals) = plotting_helper.get_cdf( resume_delays_list )
    filename = dataset_title + "-resume-delays-cdf.dat"
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

    (xvals, yvals) = plotting_helper.get_cdf( rebuffering_ratios )
    filename = dataset_title + "-rebuffering-ratios-cdf.dat"
    plotting_helper.write_points_to_file(xvals, yvals, dataset_title, filename)

Beispiel #3

0

Datei anzeigen

Datei: parse-data.py Projekt: alfalfa/analysis

def main():
    if len( sys.argv ) is not 4:
        raise ValueError("Usage: python parse-data.py SSIM_index_directory youtube_logs_directory output_directory")
    SSIM_index_directory = sys.argv[1]
    youtube_logs_directory = sys.argv[2]

    # First parse data from ssim indices into map used by playback log parser
    video_stats_lookup_maps = dict()
    for filename, resolution in directory_traversal_helper.get_files_matching_regex(SSIM_index_directory, "[0-9]+x([0-9]+)"):
        with open(filename) as SSIM_index_file:
            for line in SSIM_index_file:
                match_object = re.search("([0-9]+) ([0-9]+.[0-9]+) [A-Z] [0-9]+ ([0-9]+)", line)
                if match_object:
                    displayed_frame_index = int(match_object.group(1))
                    SSIM_score = float(match_object.group(2))
                    byte_offset = int(match_object.group(3))

                    if resolution not in video_stats_lookup_maps:
                        video_stats_lookup_maps[resolution] = ( defaultdict(int), defaultdict(float) )

                    (byte_offset_to_frame_index, frame_index_to_ssim) = video_stats_lookup_maps[resolution]
                    byte_offset_to_frame_index[byte_offset] = displayed_frame_index
                    frame_index_to_ssim[displayed_frame_index] = SSIM_score

    output_directory = sys.argv[3]
    if os.path.exists(output_directory):
        print("Removing existing contents of " + output_directory)
        shutil.rmtree(output_directory)
    os.mkdir(output_directory)

    trial_id_to_youtube_logs = dict()
    trial_id_to_stall_logs = dict()

    for f, trial_id in directory_traversal_helper.get_files_matching_regex(youtube_logs_directory, "stall-log-(.+).txt"):
        assert(trial_id not in trial_id_to_stall_logs)
        trial_id_to_stall_logs[trial_id] = f

    for f, trial_id in directory_traversal_helper.get_files_matching_regex(youtube_logs_directory, "^log-(.+).txt"):
        assert(trial_id not in trial_id_to_youtube_logs)
        trial_id_to_youtube_logs[trial_id] = f

    # check for missing log files, every stall log should have corresponding youtube log and vice versa
    missing_logs = False
    for trial_id in trial_id_to_stall_logs.keys():
        if trial_id not in trial_id_to_youtube_logs:
            print("Missing youtube log for " + trial_id)
            missing_logs = True

    for trial_id in trial_id_to_youtube_logs.keys():
        if trial_id not in trial_id_to_stall_logs:
            print("Missing stall log for " + trial_id)
            missing_logs = True

    if missing_logs:
        raise Exception("Logs missing")

    args_list = []
    for trial_id, youtube_log in trial_id_to_youtube_logs.iteritems():
            if trial_id not in trial_id_to_stall_logs:
                raise Exception("Missing stall log file for trial " + trial_id)

            # TODO maybe don't delete existing directory here
            os.mkdir(output_directory + "/" + trial_id)

            args_list.append((trial_id, youtube_log, trial_id_to_stall_logs[trial_id], video_stats_lookup_maps, output_directory))

    if len(args_list) is 0:
        print("No log files found in " + youtube_logs_directory )
    else:
        print("Processing " + str(2*len(args_list)) + " log files...")

    # Use process pool to parallelize calling get_inter_frame_delay
    Pool(processes=multiprocessing.cpu_count()).map(parse_logs_for_trial, args_list)