def parseProcessLog(processLog, output_path, start_date='', end_date='', generic=False):

    drink_metric_sets_pattern = r'''
        ProcessMetrics\ -\ Start:.*?
        ProcessID:\ (?P<process_id>[0-9]*)            # ProcessID
        .*?DrinkID:\ (?P<drink_id>[0-9]*)             # drinkID
        .*?Tasksets:\ (?P<tasksets_raw>.*?)           # tasksets
        Queued:\ (?P<queued_time>[0-9.]*)             # queued time
        .*?Processing:\ (?P<processing_time>[0-9.]*)  # processing time
        .*?ProcessMetrics\ -\ End:
        '''

    taskset_pattern = r'''
        For\ taskset\ ID:\ (?P<taskset_id>[0-9]*)  # tasksetID
        .*?Transport:\ (?P<trans_time>[0-9.]*),    # transport time
        \ Resource:\ (?P<res_time>[0-9.]*)         # Resource time
        .*?activity:\ (?P<activity_time>[0-9.]*),  # activity time
        \ slack:\ (?P<slack_time>[0-9.]*)          # slack time
        '''

    PID_Row_pattern = r'''
        :\ PID:(?P<process_id>[0-9]*)           # ProcessID
        .*?,\ Row:(?P<row_id>[0-9])             # row
        '''

    f = open(processLog, 'r')

    #
    # Use start_date and (possibly) end_date to limit the amount
    # of the log that we parse
    #
    start_offset = 0
    if start_date:
        match = re.search(start_date, f.read())
        if match:
            start_offset = match.span()[0]
            print "Startdate found...seeking to byte offset %d" % (start_offset)
        else:
            print "!! Startdate NOT found...reading from beginning of log"

    end_offset = 0
    if end_date:
        f.seek(0)
        match = re.search(end_date, f.read())
        if match:
            end_offset = match.span()[0]
            if end_offset > start_offset:
                print "End date found...truncating at byte offset %d" % (end_offset)
            else:
                print "End date found, but its before start date.  No truncating..."
                end_offset = 0
        else:
            print "!! End date NOT found...reading to end of log"
            end_offset = 0

    f.seek(start_offset)
    if end_offset > start_offset:
        buf = f.read(end_offset - start_offset)
    else:
        buf = f.read()
    f.close()
    print "start_offset = %d, end_offset = %d, length of buf: %d" % (start_offset, end_offset, len(buf))

    drink_metric_sets = [x.groupdict() for x in re.finditer(drink_metric_sets_pattern, buf, re.DOTALL|re.X)]
    #PID_Row_sets      = [x.groupdict() for x in re.finditer(PID_Row_pattern, buf, re.X)]

    drink_stats_list = []
    tasksetGroup_stats = {} 
    taskset_stats = {} 

    if generic:
        print "Treating esp and froth resources as generic locations"

    for idx, drink_metric_set in enumerate(drink_metric_sets):
        drink_metric_set['taskset_metrics'] = [x.groupdict() for x in re.finditer(taskset_pattern, drink_metric_set['tasksets_raw'], re.DOTALL|re.X)]

        drink_stats = {}
        drink_stats['taskset_ids'] = []
        drink_stats['processing_time'] = drink_metric_set['processing_time']
        drink_stats['queued_time']     = drink_metric_set['queued_time']
        drink_stats['drink_id']        = drink_metric_set['drink_id']

        tasksetGroup = []

        for taskset_metric in drink_metric_set['taskset_metrics']:

            taskset_id = taskset_metric['taskset_id']
            tasksetGroup.append(int(taskset_id))

            ts_id_dict = dict({taskset_id: ts.stringifyTasksetID(int(taskset_id), False)})
            drink_stats['taskset_ids'].append(ts_id_dict)

            if taskset_id not in taskset_stats:
                taskset_stats[taskset_id] = {}
                taskset_stats[taskset_id]['activity_time'] = {}
                taskset_stats[taskset_id]['activity_time']['raw'] = []
                taskset_stats[taskset_id]['res_time'] = {}
                taskset_stats[taskset_id]['res_time']['raw'] = []
                taskset_stats[taskset_id]['slack_time'] = {}
                taskset_stats[taskset_id]['slack_time']['raw'] = []
                taskset_stats[taskset_id]['trans_time'] = {}
                taskset_stats[taskset_id]['trans_time']['raw'] = []
            taskset_stats[taskset_id]['activity_time']['raw'].append(float(taskset_metric['activity_time']))
            taskset_stats[taskset_id]['res_time']['raw'].append(float(taskset_metric['res_time']))
            taskset_stats[taskset_id]['slack_time']['raw'].append(float(taskset_metric['slack_time']))
            taskset_stats[taskset_id]['trans_time']['raw'].append(float(taskset_metric['trans_time']))
            
        drink_stats_list.append(drink_stats)

        tasksetGroupID = str(ts.encodeTasksetGroup(tasksetGroup, generic=generic))
        if tasksetGroupID not in tasksetGroup_stats:
            tasksetGroup_stats[tasksetGroupID] = {}
            tasksetGroup_stats[tasksetGroupID]['tasksetGroup'] = tasksetGroup
            tasksetGroup_stats[tasksetGroupID]['info'] = ts.stringifyTasksetGroup(tasksetGroup, generic=generic)
            tasksetGroup_stats[tasksetGroupID]['processing_time'] = {}
            tasksetGroup_stats[tasksetGroupID]['processing_time']['raw'] = []
            tasksetGroup_stats[tasksetGroupID]['queued_time'] = {}
            tasksetGroup_stats[tasksetGroupID]['queued_time']['raw'] = []
        tasksetGroup_stats[tasksetGroupID]['processing_time']['raw'].append(float(drink_stats['processing_time']))
        tasksetGroup_stats[tasksetGroupID]['queued_time']['raw'].append(float(drink_stats['queued_time']))

    print "Parsing complete.  Found %d drink metrics" % (len(drink_metric_sets))
    print "Now doing statistics on raw taskset stats..."

    for taskset_id in taskset_stats.keys():
        taskset_stats[taskset_id]['info'] = ts.stringifyTasksetID(int(taskset_id), False)

        activity_time_np = np.array(taskset_stats[taskset_id]['activity_time']['raw'])
        taskset_stats[taskset_id]['activity_time']['max']  = activity_time_np.max()
        taskset_stats[taskset_id]['activity_time']['min']  = activity_time_np.min()
        taskset_stats[taskset_id]['activity_time']['mean'] = activity_time_np.mean()
        taskset_stats[taskset_id]['activity_time']['std']  = activity_time_np.std()
        taskset_stats[taskset_id]['activity_time']['n']    = activity_time_np.size
        del taskset_stats[taskset_id]['activity_time']['raw']
        
        res_time_np = np.array(taskset_stats[taskset_id]['res_time']['raw'])
        taskset_stats[taskset_id]['res_time']['max']  = res_time_np.max()
        taskset_stats[taskset_id]['res_time']['min']  = res_time_np.min()
        taskset_stats[taskset_id]['res_time']['mean'] = res_time_np.mean()
        taskset_stats[taskset_id]['res_time']['std']  = res_time_np.std()
        taskset_stats[taskset_id]['res_time']['n']    = res_time_np.size
        del taskset_stats[taskset_id]['res_time']['raw']
        
        slack_time_np = np.array(taskset_stats[taskset_id]['slack_time']['raw'])
        taskset_stats[taskset_id]['slack_time']['max']  = slack_time_np.max()
        taskset_stats[taskset_id]['slack_time']['min']  = slack_time_np.min()
        taskset_stats[taskset_id]['slack_time']['mean'] = slack_time_np.mean()
        taskset_stats[taskset_id]['slack_time']['std']  = slack_time_np.std()
        taskset_stats[taskset_id]['slack_time']['n']    = slack_time_np.size
        del taskset_stats[taskset_id]['slack_time']['raw']
        
        trans_time_np = np.array(taskset_stats[taskset_id]['trans_time']['raw'])
        taskset_stats[taskset_id]['trans_time']['max']  = trans_time_np.max()
        taskset_stats[taskset_id]['trans_time']['min']  = trans_time_np.min()
        taskset_stats[taskset_id]['trans_time']['mean'] = trans_time_np.mean()
        taskset_stats[taskset_id]['trans_time']['std']  = trans_time_np.std()
        taskset_stats[taskset_id]['trans_time']['n']    = trans_time_np.size
        del taskset_stats[taskset_id]['trans_time']['raw']

    short_list = []
    plot_data_list = []
    for tasksetGroupID in tasksetGroup_stats.keys():
        processing_time_np = np.array(tasksetGroup_stats[tasksetGroupID]['processing_time']['raw'])
        tasksetGroup_stats[tasksetGroupID]['processing_time']['max']  = processing_time_np.max()
        tasksetGroup_stats[tasksetGroupID]['processing_time']['min']  = processing_time_np.min()
        tasksetGroup_stats[tasksetGroupID]['processing_time']['mean'] = processing_time_np.mean()
        tasksetGroup_stats[tasksetGroupID]['processing_time']['std']  = processing_time_np.std()
        tasksetGroup_stats[tasksetGroupID]['processing_time']['n']    = processing_time_np.size
        del tasksetGroup_stats[tasksetGroupID]['processing_time']['raw']

        queued_time_np = np.array(tasksetGroup_stats[tasksetGroupID]['queued_time']['raw'])
        tasksetGroup_stats[tasksetGroupID]['queued_time']['max']  = queued_time_np.max()
        tasksetGroup_stats[tasksetGroupID]['queued_time']['min']  = queued_time_np.min()
        tasksetGroup_stats[tasksetGroupID]['queued_time']['mean'] = queued_time_np.mean()
        tasksetGroup_stats[tasksetGroupID]['queued_time']['std']  = queued_time_np.std()
        tasksetGroup_stats[tasksetGroupID]['queued_time']['n']    = queued_time_np.size
        del tasksetGroup_stats[tasksetGroupID]['queued_time']['raw']

        short_list.append("%s, num: %d, mean_time: %d (%s:[%s])" % 
            (tasksetGroup_stats[tasksetGroupID]['info'], processing_time_np.size, processing_time_np.mean(), 
            tasksetGroupID, ', '.join(str(e) for e in tasksetGroup_stats[tasksetGroupID]['tasksetGroup'])))

        plot_data = {}
        plot_data['TasksetGroupID'] = tasksetGroupID
        plot_data['info'] = tasksetGroup_stats[tasksetGroupID]['info']
        plot_data['queued_time_mean'] = tasksetGroup_stats[tasksetGroupID]['queued_time']['mean']
        plot_data['processing_time_mean'] = tasksetGroup_stats[tasksetGroupID]['processing_time']['mean']
        plot_data['n1'] = tasksetGroup_stats[tasksetGroupID]['processing_time']['n']

        activity_time_mean, res_time_mean, slack_time_mean, trans_time_mean = 0.0, 0.0, 0.0, 0.0

        for tasksetID in tasksetGroup_stats[tasksetGroupID]['tasksetGroup']:
            taskset_stat = taskset_stats[str(tasksetID)]
            activity_time_mean += taskset_stat['activity_time']['mean']
            res_time_mean      += taskset_stat['res_time']['mean']
            slack_time_mean    += taskset_stat['slack_time']['mean']
            trans_time_mean    += taskset_stat['trans_time']['mean']
        plot_data['n2'] = taskset_stat['trans_time']['n']

        plot_data['processing_time_calc'] = res_time_mean + trans_time_mean
        plot_data['activity_time_mean'] = activity_time_mean
        plot_data['res_time_mean'] = res_time_mean
        plot_data['slack_time_mean'] = slack_time_mean
        plot_data['trans_time_mean'] = trans_time_mean
        plot_data['num_tasksets'] = len(tasksetGroup_stats[tasksetGroupID]['tasksetGroup'])

        # break up the info str to get extra data for plotting...hack!
        # Denest->Esp->WIP, urn/h20: N, ice: N, lid: Y, NFADDs: 0, RFADDs: 1"
        #
        info_list = tasksetGroup_stats[tasksetGroupID]['info'].split(',')
        plot_data['locs']    = info_list[0]
        plot_data['urn_h20'] = (1 if info_list[1].split(':')[1].strip() == 'Y' else 0)
        plot_data['ice']     = (1 if info_list[2].split(':')[1].strip() == 'Y' else 0)
        plot_data['lid']     = (1 if info_list[3].split(':')[1].strip() == 'Y' else 0)
        plot_data['NFADDs']  = int(info_list[4].split(':')[1].strip())
        plot_data['RFADDs']  = int(info_list[5].split(':')[1].strip())

        plot_data_list.append(plot_data)

    print "Calculated stats on %d taskset IDs" % (len(taskset_stats.keys()))

    fout = open(('%s/taskset_ids.json' % (output_path)), 'w')
    fout.write(json.dumps(taskset_stats, sort_keys=True, indent=4, separators=(',', ': ')))
    fout.close()

    fout = open(('%s/drink_stats.json' % (output_path)), 'w')
    fout.write(json.dumps(drink_stats_list, sort_keys=True, indent=4, separators=(',', ': ')))
    fout.close()

    fout = open(('%s/tasksetGroup_stats.json' % (output_path)), 'w')
    fout.write(json.dumps(tasksetGroup_stats, sort_keys=True, indent=4, separators=(',', ': ')))
    fout.close()

    short_list.sort()
    fout = open(('%s/short_list.json' % (output_path)), 'w')
    fout.write(json.dumps(short_list, sort_keys=True, indent=4, separators=(',', ': ')))
    fout.close()

    fout = open(('%s/plot_data.json' % (output_path)), 'w')
    fout.write(json.dumps(plot_data_list, sort_keys=True, indent=4, separators=(',', ': ')))
    fout.close()
Example #2
0
#!/usr/bin/env python

import argparse
import taskset_ids as ts

if __name__ == "__main__":
    '''
    print_taskset_id.py 
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument("taskset_id")
    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
    parser.set_defaults(verbose=False)
    
    args = parser.parse_args()

    if args.taskset_id:
        print ts.stringifyTasksetID(int(args.taskset_id), args.verbose)
    else:
        parser.print_help()
        sys.exit(1)