Ejemplo n.º 1
0
 def save_data(self, start_end_times):
     """
     Calculates the actual request rate and prints as the first line. Saves
     the CDFs of latency and bandwidth betweeen 1-2 minutes to disk. Assumes
     that all redis client hosts report correct times.
     
     """
     # Save the CDF of the start times. TODO: Debug.
     with open('data/distr_redis_raw_start_time_cdf.txt', 'w') as f:
         start_time_list = [t for (t, _, _) in start_end_times]
         for (t, p) in util.make_cdf_table(start_time_list):
             print >> f, '%.5f' % t, p
             
     # Save the start end times list. TODO: Debug.
     with open('data/distr_redis_raw_start_end_times.txt','w') as f:
         for (start_time, end_time, data_file) in start_end_times:
             print >> f, '%.5f' % start_time, end_time, data_file
     
     # Filter out irrelevant time values. Focus on 60th-120th seconds.
     min_time = min([start_time for (start_time, _, _) in start_end_times])
     def is_steady_state(start_end_time_tuple):
         (start_time, _, _) = start_end_time_tuple
         return min_time + 60 <= start_time <= min_time + 120
     filtered_times = filter(is_steady_state, start_end_times)
     filtered_times.sort()
     
     print 'Raw data size:', len(start_end_times),
     print 'Data between 60-120th seconds:', len(filtered_times)
     
     # Figure out the actual gaps in milliseconds. 
     start_time_list = [start for (start, _, _) in filtered_times]
     gap_list = []
     for index in range(0, len(start_time_list) - 1):
         gap = start_time_list[index + 1] - start_time_list[index]
         gap_list.append(gap * 1000.0)
     gap_list.sort()
     print 'Client gap: (mean, stdev) =', util.get_mean_and_stdev(gap_list),
     print 'median =', gap_list[len(gap_list)/2]
     
     # Calculate latency and bandwidth.
     latency_list = []
     bandwidth_list = []
     for (start_time, end_time, _) in filtered_times:
         if end_time is None:
             latency = -1
             bandwidth = 0
         else:
             latency = end_time - start_time  # seconds
             bandwidth = DATA_LENGTH / latency  # Bytes/s
         latency_list.append(latency * 1000.0)  # milliseconds
         bandwidth_list.append(bandwidth * 8.0 / 1000000.0)  # Mbps
     
     # Write to file.
     with open('data/distr_redis_latency.txt', 'w') as f:
         for (v, p) in util.make_cdf_table(latency_list):
             print >> f, v, p
     with open('data/distr_redis_bw.txt', 'w') as f:
         for (v, p) in util.make_cdf_table(bandwidth_list):
             print >> f, v, p
Ejemplo n.º 2
0
def data_analysis():

    start_end_times = []

    for path in os.listdir('.'):
        if path.startswith('simplified-') and path.endswith('.tmp'):
            with open(path) as f:
                start_end_times += pickle.load(f)
            print 'Loaded', path

    # Extract steady state.
    min_time = min([start_time for (start_time, _) in start_end_times])

    def is_steady_state(start_end_time_tuple):
        (start_time, _) = start_end_time_tuple
        return min_time + INTERESTING_TIME_START <= start_time <= min_time + INTERESTING_TIME_END

    filtered_times = filter(is_steady_state, start_end_times)
    filtered_times.sort()

    print 'Raw data size:', len(start_end_times),
    print 'Data at steady state:', len(filtered_times)

    # Figure out the actual gaps in milliseconds.
    start_time_list = [start for (start, _) in filtered_times]
    gap_list = []
    for index in range(0, len(start_time_list) - 1):
        gap = start_time_list[index + 1] - start_time_list[index]
        gap_list.append(gap * 1000.0)
    gap_list.sort()
    print 'Client gap: (mean, stdev) =', util.get_mean_and_stdev(gap_list),
    print 'median =', gap_list[len(gap_list) / 2]

    # Calculate latency and bandwidth.
    latency_list = []
    bandwidth_list = []
    for (start_time, end_time) in filtered_times:
        if end_time is None:
            latency = -1
            bandwidth = 0
        else:
            latency = end_time - start_time  # seconds
            bandwidth = DATA_LENGTH / latency  # Bytes/s
        latency_list.append(latency * 1000.0)  # milliseconds
        bandwidth_list.append(bandwidth * 8.0 / 1000000.0)  # Mbps

    # Write to file.
    with open('data/simplified_redis_latency.csv', 'w') as f:
        for (v, p) in util.make_cdf_table(latency_list):
            print >> f, '%.10f,%.10f' % (v, p)
    with open('data/simplified_redis_bw.csv', 'w') as f:
        for (v, p) in util.make_cdf_table(bandwidth_list):
            print >> f, '%.10f,%.10f' % (v, p)
Ejemplo n.º 3
0
def _make_cdf(csv_file, column=0, entry_count=None):
    
    inlist = []
    with open(csv_file) as csv_f:
        for line in csv_f:
            line = line.strip()
            if line:
                if ',' in line:
                    v = line.split(',')[column]
                elif '\t' in line:
                    v = line.split('\t')[column]
                else:
                    v = line.split()[column]
            inlist += [float(v)]
    
    if entry_count:
        inlist = inlist[0 : entry_count]
        inlist += [0] * (entry_count - len(inlist))
    
    return make_cdf_table(inlist)
Ejemplo n.º 4
0
def data_analysis():

    start_end_times = []
    
    # Load experiment data file
    try:
        data_file = 'async-' + os.environ['EXP_NAME']
    except KeyError:
        data_file = None
    for path in os.listdir('data'):
        if (path == data_file) or \
            (data_file is None and path.startswith('async-') and path.endswith('.tmp')):
            with open('data/' + path) as f:
                start_end_times += pickle.load(f)
            print 'Loaded', path
            
    # Extract steady state.
    min_time = min([start_time for (start_time, _) in start_end_times])
    def is_steady_state(start_end_time_tuple):
        (start_time, _) = start_end_time_tuple
        return min_time + INTERESTING_TIME_START <= start_time <= min_time + INTERESTING_TIME_END
    filtered_times = filter(is_steady_state, start_end_times)
    filtered_times.sort()    
    
    print 'Raw data size:', len(start_end_times),
    print 'Data at steady state:', len(filtered_times)
    
    # Figure out the actual gaps in milliseconds. 
    start_time_list = [start for (start, _) in filtered_times]
    gap_list = []
    for index in range(0, len(start_time_list) - 1):
        gap = start_time_list[index + 1] - start_time_list[index]
        gap_list.append(gap * 1000.0)
    gap_list.sort()
    print 'Client gap: (mean, stdev) =', util.get_mean_and_stdev(gap_list),
    print 'median =', gap_list[len(gap_list)/2]
    
    # Save start_time list and gap list.
    with open('data/start_times.csv', 'w') as start_time_f:
        for start_time_v in start_time_list:
            print >> start_time_f, '%.8f' % start_time_v
    with open('data/gaps.csv', 'w') as gap_f:
        for (v, p) in util.make_cdf_table(gap_list):
            print >> gap_f, '%f,%f' % (v, p)
    
    # Calculate latency and bandwidth.
    latency_list = []
    bandwidth_list = []
    for (start_time, end_time) in filtered_times:
        if end_time is None:
            latency = 1000
            bandwidth = 0
        else:
            latency = end_time - start_time  # seconds
            bandwidth = DATA_LENGTH / latency  # Bytes/s
        latency_list.append(latency * 1000.0)  # milliseconds
        bandwidth_list.append(bandwidth * 8.0 / 1000000.0)  # Mbps
    
    # Write to file.
    print 'Writing to data/async_redis_latency.csv...'
    with open('data/async_redis_latency.csv', 'w') as f:
        for (v, p) in util.make_cdf_table(latency_list):
            print >> f, '%.10f,%.10f' % (v, p)
            
    print 'Writing to data/async_redis_bw.csv...'
    with open('data/async_redis_bw.csv', 'w') as f:
        for (v, p) in util.make_cdf_table(bandwidth_list):
            print >> f, '%.10f,%.10f' % (v, p)
    
    # Analyze timings of OF events.
    
    subprocess.call('cp of_timings.csv data/; cp /tmp/client.pcap /tmp/server.pcap data/', shell=True)

    
    import timing_analysis
    timing_analysis.main('data/client.pcap', 'data/of_timings.csv', 'data/server.pcap')