Beispiel #1
0
 def add_file_node(path):
     if not path in log['step']['known_paths']:
         return
             
     if 'real_path' in log['step']['known_paths'][path]:
         path = log['step']['known_paths'][path]['real_path']
     label = log['step']['known_paths'][path]['label']
     color = '#ffffff'
     if log['step']['known_paths'][path]['type'] == 'fifo':
         color = '#c4f099'
     elif log['step']['known_paths'][path]['type'] == 'file':
         color = '#8ae234'
     elif log['step']['known_paths'][path]['type'] == 'step_file':
         color = '#97b7c8'
         if path in log['step']['known_paths']:
             if 'size' in log['step']['known_paths'][path]:
                 label += "\\n%s" % misc.bytes_to_str(
                     log['step']['known_paths'][path]['size'])
     hash['nodes'][misc.str_to_sha1(path)] = {
         'label': label,
         'fillcolor': color
     }
Beispiel #2
0
    def add_file_node(path):
        if not path in log['step']['known_paths']:
            return

        if 'real_path' in log['step']['known_paths'][path]:
            path = log['step']['known_paths'][path]['real_path']
        label = os.path.basename(path)
        color = '#ffffff'
        if log['step']['known_paths'][path]['type'] in ['fifo', 'directory']:
            color = '#c4f099'
        elif log['step']['known_paths'][path]['type'] == 'file':
            color = '#8ae234'
        elif log['step']['known_paths'][path]['type'] == 'step_file':
            color = '#97b7c8'
            label = log['step']['known_paths'][path]['label']
            if path in log['step']['known_paths']:
                if 'size' in log['step']['known_paths'][path]:
                    label += "\\n%s" % misc.bytes_to_str(
                        log['step']['known_paths'][path]['size'])
        pipe_hash['nodes'][misc.str_to_sha1(path)] = {
            'label': label,
            'fillcolor': color
        }
Beispiel #3
0
 def file_hash(path):
     if path in log['step']['known_paths']:
         if 'real_path' in log['step']['known_paths'][path]:
             path = log['step']['known_paths'][path]['real_path']
     return misc.str_to_sha1(path)
Beispiel #4
0
 def pid_hash(pid, suffix = ''):
     hashtag = "%s/%s/%d/%s" % (log['step']['name'], 
                                log['run']['run_id'], 
                                pid, suffix)
     return misc.str_to_sha1(hashtag)
Beispiel #5
0
def render_pipeline_hash(log):
        
    def pid_hash(pid, suffix = ''):
        hashtag = "%s/%s/%d/%s" % (log['step']['name'], 
                                   log['run']['run_id'], 
                                   pid, suffix)
        return misc.str_to_sha1(hashtag)
        
    def file_hash(path):
        if path in log['step']['known_paths']:
            if 'real_path' in log['step']['known_paths'][path]:
                path = log['step']['known_paths'][path]['real_path']
        return misc.str_to_sha1(path)
        
        
    hash = dict()
    hash['nodes'] = dict()
    hash['edges'] = dict()
    hash['clusters'] = dict()
    hash['graph_labels'] = dict()
        
    def add_file_node(path):
        if not path in log['step']['known_paths']:
            return
                
        if 'real_path' in log['step']['known_paths'][path]:
            path = log['step']['known_paths'][path]['real_path']
        label = log['step']['known_paths'][path]['label']
        color = '#ffffff'
        if log['step']['known_paths'][path]['type'] == 'fifo':
            color = '#c4f099'
        elif log['step']['known_paths'][path]['type'] == 'file':
            color = '#8ae234'
        elif log['step']['known_paths'][path]['type'] == 'step_file':
            color = '#97b7c8'
            if path in log['step']['known_paths']:
                if 'size' in log['step']['known_paths'][path]:
                    label += "\\n%s" % misc.bytes_to_str(
                        log['step']['known_paths'][path]['size'])
        hash['nodes'][misc.str_to_sha1(path)] = {
            'label': label,
            'fillcolor': color
        }
            
    for proc_info in copy.deepcopy(log['pipeline_log']['processes']):
        pid = proc_info['pid']
        label = "PID %d" % pid
        name = '(unknown)'
        if 'name' in proc_info:
            name = proc_info['name']
        label = "%s" % (proc_info['name'])
        if 'writes' in proc_info['hints']:
            for path in proc_info['hints']['writes']:
                add_file_node(path)
        if 'args' in proc_info:
            stripped_args = []
            for arg in copy.deepcopy(proc_info['args']):
                if arg in log['step']['known_paths']:
                    add_file_node(arg)
                if arg in log['step']['known_paths']:
                    if log['step']['known_paths'][arg]['type'] != 'step_file':
                        arg = log['step']['known_paths'][arg]['label']
                    else:
                        arg = os.path.basename(arg)
                else:
                    if arg[0:4] != '/dev':
                        arg = os.path.basename(arg)
                        if (len(arg) > 16) and re.match('^[A-Z]+$', arg):
                            arg = "%s[...]" % arg[:16]
                stripped_args.append(arg.replace('\t', '\\t').replace(
                    '\\', '\\\\'))
            tw = textwrap.TextWrapper(
                width = 50, 
                break_long_words = False, 
                break_on_hyphens = False)
            label = "%s" % ("\\n".join(tw.wrap(' '.join(stripped_args))))
        if 'args' in proc_info:
            output_fifo = False
            for arg in proc_info['args']:
                fifo_type = None
                if name == 'cat4m' and arg == '-o':
                    output_fifo = True
                elif name == 'dd' and arg.startswith('of='):
                    output_fifo = True
                for known_path in log['step']['known_paths'].keys():
                    if known_path in arg:
                        add_file_node(known_path)
                        if name in ['cat4m', 'dd']:
                            if output_fifo:
                                fifo_type = 'output'
                            else:
                                fifo_type = 'input'
                        else:
                            # we can't know whether the fifo is for input or
                            # output, first look at the hints, then use the
                            # designation (if any was given)
                            if 'reads' in proc_info['hints'] and \
                               arg in proc_info['hints']['reads']:
                                fifo_type = 'input'
                                if 'writes' in proc_info['hints'] and \
                                   arg in proc_info['hints']['writes']:
                                    fifo_type = 'output'
                            if fifo_type is None:
                                fifo_type = log['step']['known_paths'][arg]\
                                            ['designation']
                        if fifo_type == 'input':
                            # add edge from file to proc
                            hash['edges'][(file_hash(arg), pid_hash(pid))] \
                                = dict()
                        elif fifo_type == 'output':
                            # add edge from proc to file
                            hash['edges'][(pid_hash(pid), file_hash(arg))] \
                                = dict()
        if 'writes' in proc_info['hints']:
            for path in proc_info['hints']['writes']:
                hash['edges'][(pid_hash(pid), file_hash(path))] = dict()
        # add proc
        something_went_wrong = False
        if 'signal' in proc_info:
            something_went_wrong = True
        elif 'exit_code' in proc_info:
            if proc_info['exit_code'] != 0:
                something_went_wrong = True
        else:
            something_went_wrong = True
        color = "#fce94f"
        if something_went_wrong:
            if not pid in log['pipeline_log']['ok_to_fail']:
                color = "#d5291a"
            if 'signal' in proc_info:
                label = "%s\\n(received %s%s)" % (
                    label, 
                    'friendly ' \
                    if pid in log['pipeline_log']['ok_to_fail'] else '',
                    proc_info['signal_name'] if 'signal_name' in \
                    proc_info else 'signal %d' % proc_info['signal'])
            elif 'exit_code' in proc_info:
                if proc_info['exit_code'] != 0:
                    label = "%s\\n(failed with exit code %d)" % (
                        label, proc_info['exit_code'])
            else:
                label = "%s\\n(no exit code)" % label
                    
        if 'max' in log['pipeline_log']['process_watcher']:
            if pid in log['pipeline_log']['process_watcher']['max']:
                label += "\\n%1.1f%% CPU, %s RAM (%1.1f%%)" % (
                    log['pipeline_log']['process_watcher']['max'][pid]\
                    ['cpu_percent'],
                    misc.bytes_to_str(
                        log['pipeline_log']['process_watcher']['max'][pid]\
                        ['rss']),
                    log['pipeline_log']['process_watcher']['max'][pid]\
                    ['memory_percent'])
                
        hash['nodes'][pid_hash(pid)] = {
            'label': label,
            'fillcolor': color
        }
            
        for which in ['stdout', 'stderr']:
            key = "%s_copy" % which
            if key in proc_info:
                if ('exit_code' in proc_info[key]) and \
                   (proc_info[key]['exit_code'] == 0) and \
                   ('length' in proc_info[key]) and \
                   (proc_info[key]['length'] == 0) and \
                   (not 'sink_full_path' in proc_info[key]):
                    # skip this stdout/stderr box if it leads to nothing
                    continue
                size_label = '(empty)'
                if ('length' in proc_info[key]) and \
                   (proc_info[key]['length'] > 0):
                    speed = float(proc_info[key]['length']) / (
                        proc_info[key]['end_time'] - 
                        proc_info[key]['start_time']).total_seconds()
                    speed_label = "%s/s" % misc.bytes_to_str(speed)
                    size_label = "%s / %s lines (%s)" % (
                        misc.bytes_to_str(proc_info[key]['length']),
                        "{:,}".format(proc_info[key]['lines']),
                        speed_label)
                label = "%s\\n%s" % (which, size_label)
                
                something_went_wrong = False
                if 'signal' in proc_info[key]:
                    something_went_wrong = True
                elif 'exit_code' in proc_info[key]:
                    if proc_info[key]['exit_code'] != 0:
                        something_went_wrong = True
                else:
                    something_went_wrong = True
                color = "#fdf3a7"
                if something_went_wrong:
                    if not pid in log['pipeline_log']['ok_to_fail']:
                        color = "#d5291a"
                    if 'signal' in proc_info[key]:
                        label = "%s\\n(received %s%s)" % (
                            label, 
                            "friendly " if pid in \
                            log['pipeline_log']['ok_to_fail'] else '',
                            proc_info[key]['signal_name'] if 'signal_name'\
                            in proc_info[key] else 'signal %d' % 
                            proc_info[key]['signal'])
                    elif 'exit_code' in proc_info[key]:
                        if proc_info[key]['exit_code'] != 0:
                            label = "%s\\n(failed with exit code %d)" % (
                                label, proc_info[key]['exit_code'])
                    else:
                        label = "%s\\n(no exit code)" % label
                        
                                
                # add proc_which
                hash['nodes'][pid_hash(pid, which)] = {
                    'label': label,
                    'fillcolor': color
                }
                if 'sink_full_path' in proc_info[key]:
                    path = proc_info[key]['sink_full_path']
                    add_file_node(path)

    for proc_info in copy.deepcopy(log['pipeline_log']['processes']):
        pid = proc_info['pid']
        if 'use_stdin_of' in proc_info:
            other_pid = proc_info['use_stdin_of']
            hash['edges'][(pid_hash(other_pid, 'stdout'), pid_hash(pid))] \
                = dict()
        for which in ['stdout', 'stderr']:
            key = "%s_copy" % which
            if key in proc_info:
                other_pid = proc_info[key]['pid']
                hash['edges'][(pid_hash(pid), pid_hash(pid, which))] = dict()
                if 'sink_full_path' in proc_info[key]:
                    hash['edges'][(
                        pid_hash(pid, which),
                        file_hash(proc_info[key]['sink_full_path']))] = dict()

    # define nodes which go into subgraph
    step_file_nodes = dict()
    for path, path_info in log['step']['known_paths'].items():
        if path_info['type'] == 'step_file':
            step_file_nodes[file_hash(path)] = path_info['designation']

    task_name = "%s/%s" % (log['step']['name'], log['run']['run_id'])
    cluster_hash = misc.str_to_sha1(task_name)
    hash['clusters'][cluster_hash] = dict()
    hash['clusters'][cluster_hash]['task_name'] = task_name
    hash['clusters'][cluster_hash]['group'] = list()
    for node in hash['nodes'].keys():
        if not node in step_file_nodes:
            hash['clusters'][cluster_hash]['group'].append(node)
                
    start_time = log['start_time']
    end_time = log['end_time']
    duration = end_time - start_time

    text = "Task: %s\\lHost: %s\\lDuration: %s\\l" % (
        task_name, socket.gethostname(),
        misc.duration_to_str(duration, long = True)
    )
    hash['graph_labels'][task_name] = text
    if 'max' in log['pipeline_log']['process_watcher']:
        text = "CPU: %1.1f%%, %d CORES_Requested , RAM: %s (%1.1f%%)\\l" % (
            log['pipeline_log']['process_watcher']['max']['sum']['cpu_percent'],
            log['step']['cores'],
            misc.bytes_to_str(log['pipeline_log']['process_watcher']['max']\
                              ['sum']['rss']), 
            log['pipeline_log']['process_watcher']['max']['sum']['memory_percent'])
        hash['graph_labels'][task_name] += text
    if 'signal' in log:
        hash['graph_labels'][task_name] += "Caught signal: %s\\l" % (
            process_pool.ProcessPool.SIGNAL_NAMES[log['signal']])
    hash['graph_labels'][task_name] += "\\l"
    return hash
Beispiel #6
0
def create_hash_from_annotation(log):
        
    def pid_hash(pid, suffix = ''):
        hashtag = "%s/%s/%d/%s" % (log['step']['name'], 
                                   log['run']['run_id'], 
                                   pid, suffix)
        return misc.str_to_sha1(hashtag)
        
    def file_hash(path):
        if path in log['step']['known_paths']:
            if 'real_path' in log['step']['known_paths'][path]:
                path = log['step']['known_paths'][path]['real_path']
        return misc.str_to_sha1(path)
        
        
    pipe_hash = dict()
    pipe_hash['nodes'] = dict()
    pipe_hash['edges'] = dict()
    pipe_hash['clusters'] = dict()
    pipe_hash['graph_labels'] = dict()
        
    def add_file_node(path):
        if not path in log['step']['known_paths']:
            return
                
        if 'real_path' in log['step']['known_paths'][path]:
            path = log['step']['known_paths'][path]['real_path']
        label = os.path.basename(path)
        color = '#ffffff'
        if log['step']['known_paths'][path]['type'] in ['fifo', 'directory']:
            color = '#c4f099'
        elif log['step']['known_paths'][path]['type'] == 'file':
            color = '#8ae234'
        elif log['step']['known_paths'][path]['type'] == 'step_file':
            color = '#97b7c8'
            label = log['step']['known_paths'][path]['label']
            if path in log['step']['known_paths']:
                if 'size' in log['step']['known_paths'][path]:
                    label += "\\n%s" % misc.bytes_to_str(
                        log['step']['known_paths'][path]['size'])
        pipe_hash['nodes'][misc.str_to_sha1(path)] = {
            'label': label,
            'fillcolor': color
        }
            
    for proc_info in log['pipeline_log']['processes']:
        pid = proc_info['pid']
        # Set name and label variable
        try:
            name = proc_info['name']
            label = "%s" % (proc_info['name'])
        except KeyError:
            name = '(unknown)'
            label = "PID %d" % pid
            
        try:
            # Add file nodes for every file in hints
            for path in proc_info['hints']['writes']:
                add_file_node(path)
                pipe_hash['edges'][(pid_hash(pid), file_hash(path))] = dict()
        except KeyError:
            pass

        try:
            # Add all the info for each process to pipe_hash
            stripped_args = []
            is_output_file = False
            for arg in proc_info['args']:
                # Try to investigate how fifos are integrated in data stream
                # Hier muss die Entscheidung rein ob eine Datei für Input oder
                # Output genutzt wird
                io_type = None
                if name == 'cat':
                    is_output_file = False
                elif name == 'dd' and arg.startswith('of='):
                    is_output_file = True
                elif name == 'dd' and arg.startswith('if='):
                    is_output_file = False
                elif name in ['mkdir', 'mkfifo']:
                    is_output_file = True
                for known_path in log['step']['known_paths'].keys():
                    # Check if arg contains a known path ...
                    if known_path in arg:
                        # ... if so add this file to the graph 
                        add_file_node(known_path)
                        # Is the process able to in-/output files?
                        if name in ['cat', 'dd', 'mkdir', 'mkfifo']:
                            if is_output_file:
                                io_type = 'output'
                            else:
                                io_type = 'input'
                        elif name == 'fix_cutadapt.py':
                            if arg == proc_info['args'][-2]:
                                io_type = 'input'
                            elif arg == proc_info['args'][-1]:
                                io_type = 'output'
                            elif proc_info[ proc_info['args'].index(arg) - 1 ] \
                                 == '--R2-in':
                                io_type = 'input'
                            elif proc_info[ proc_info['args'].index(arg) - 1 ] \
                                 == '--R2-out':
                                io_type = 'output'
                        else:
                            # we can't know whether the fifo is for input or
                            # output, first look at the hints, then use the
                            # designation (if any was given)
                            if 'reads' in proc_info['hints'] and \
                               arg in proc_info['hints']['reads']:
                                io_type = 'input'
                            if 'writes' in proc_info['hints'] and \
                               arg in proc_info['hints']['writes']:
                                io_type = 'output'
                            if io_type is None:
                                io_type = log['step']['known_paths'][known_path]\
                                            ['designation']
                                if io_type is None:
                                    io_type = 'input'

                                print('io_type: %s\nknown_path: %s' % 
                                      (io_type, known_path)
                                )

                        if io_type == 'input':
                            # add edge from file to proc
                            pipe_hash['edges']\
                                [(file_hash(known_path),pid_hash(pid))] = dict()
                        elif io_type == 'output':
                            # add edge from proc to file
                            pipe_hash['edges']\
                                [(pid_hash(pid), file_hash(known_path))] = dict()

                        basename = os.path.basename(known_path)
                        #if log['step']['known_paths'][known_path]['type'] != \
                        #   'step_file':
                        arg = arg.replace(known_path, basename)
#                            break
#                        else:
#                            arg = basename
#                            break
                    else:
#                        if arg[0:4] != '/dev':
#                            arg = os.path.basename(arg)
                        if (len(arg) > 16) and re.match('^[A-Z]+$', arg):
                            arg = "%s[...]" % arg[:16]
                stripped_args.append(arg.replace('\t', '\\t').replace(
                    '\\', '\\\\'))
                
            tw = textwrap.TextWrapper(
                width = 50, 
                break_long_words = False, 
                break_on_hyphens = False)
            label = "%s" % ("\\n".join(tw.wrap(' '.join(stripped_args))))

        # If any key wasn't around let's go on
        except KeyError:
            pass

        # add proc
        something_went_wrong = False
        if 'signal' in proc_info:
            something_went_wrong = True
        elif 'exit_code' in proc_info:
            if proc_info['exit_code'] != 0:
                something_went_wrong = True
        else:
            something_went_wrong = True
        color = "#fce94f"
        if something_went_wrong:
            if not pid in log['pipeline_log']['ok_to_fail']:
                color = "#d5291a"
            if 'signal' in proc_info:
                label = "%s\\n(received %s%s)" % (
                    label, 
                    'friendly ' \
                    if pid in log['pipeline_log']['ok_to_fail'] else '',
                    proc_info['signal_name'] if 'signal_name' in \
                    proc_info else 'signal %d' % proc_info['signal'])
            elif 'exit_code' in proc_info:
                if proc_info['exit_code'] != 0:
                    label = "%s\\n(failed with exit code %d)" % (
                        label, proc_info['exit_code'])
            else:
                label = "%s\\n(no exit code)" % label
                    
        if 'max' in log['pipeline_log']['process_watcher']:
            if pid in log['pipeline_log']['process_watcher']['max']:
                label += "\\n%1.1f%% CPU, %s RAM (%1.1f%%)" % (
                    log['pipeline_log']['process_watcher']['max'][pid]\
                    ['cpu_percent'],
                    misc.bytes_to_str(
                        log['pipeline_log']['process_watcher']['max'][pid]\
                        ['rss']),
                    log['pipeline_log']['process_watcher']['max'][pid]\
                    ['memory_percent'])
                
        pipe_hash['nodes'][pid_hash(pid)] = {
            'label': label,
            'fillcolor': color,
            'start_time': proc_info['start_time']
        }
            
        for which in ['stdout', 'stderr']:
            key = "%s_copy" % which
            if key in proc_info:
                if ('exit_code' in proc_info[key]) and \
                   (proc_info[key]['exit_code'] == 0) and \
                   ('length' in proc_info[key]) and \
                   (proc_info[key]['length'] == 0) and \
                   (not 'sink_full_path' in proc_info[key]):
                    # skip this stdout/stderr box if it leads to nothing
                    continue
                size_label = '(empty)'
                if ('length' in proc_info[key]) and \
                   (proc_info[key]['length'] > 0):
                    speed = float(proc_info[key]['length']) / (
                        proc_info[key]['end_time'] - 
                        proc_info[key]['start_time']).total_seconds()
                    speed_label = "%s/s" % misc.bytes_to_str(speed)
                    size_label = "%s / %s lines (%s)" % (
                        misc.bytes_to_str(proc_info[key]['length']),
                        "{:,}".format(proc_info[key]['lines']),
                        speed_label)
                label = "%s\\n%s" % (which, size_label)
                
                something_went_wrong = False
                if 'signal' in proc_info[key]:
                    something_went_wrong = True
                elif 'exit_code' in proc_info[key]:
                    if proc_info[key]['exit_code'] != 0:
                        something_went_wrong = True
                else:
                    something_went_wrong = True
                color = "#fdf3a7"
                if something_went_wrong:
                    if not pid in log['pipeline_log']['ok_to_fail']:
                        color = "#d5291a"
                    if 'signal' in proc_info[key]:
                        label = "%s\\n(received %s%s)" % (
                            label, 
                            "friendly " if pid in \
                            log['pipeline_log']['ok_to_fail'] else '',
                            proc_info[key]['signal_name'] if 'signal_name'\
                            in proc_info[key] else 'signal %d' % 
                            proc_info[key]['signal'])
                    elif 'exit_code' in proc_info[key]:
                        if proc_info[key]['exit_code'] != 0:
                            label = "%s\\n(failed with exit code %d)" % (
                                label, proc_info[key]['exit_code'])
                    else:
                        label = "%s\\n(no exit code)" % label

                # add proc_which
                pipe_hash['nodes'][pid_hash(pid, which)] = {
                    'label': label,
                    'fillcolor': color
                }
                if 'sink_full_path' in proc_info[key]:
                    path = proc_info[key]['sink_full_path']
                    add_file_node(path)

    for proc_info in copy.deepcopy(log['pipeline_log']['processes']):
        pid = proc_info['pid']
        if 'use_stdin_of' in proc_info:
            other_pid = proc_info['use_stdin_of']
            pipe_hash['edges'][(pid_hash(other_pid, 'stdout'), pid_hash(pid))] \
                = dict()
        for which in ['stdout', 'stderr']:
            key = "%s_copy" % which
            if key in proc_info:
                other_pid = proc_info[key]['pid']
                pipe_hash['edges'][(pid_hash(pid), pid_hash(pid, which))] = dict()
                if 'sink_full_path' in proc_info[key]:
                    pipe_hash['edges'][(
                        pid_hash(pid, which),
                        file_hash(proc_info[key]['sink_full_path']))] = dict()

    # define nodes which go into subgraph
    step_file_nodes = dict()
    for path, path_info in log['step']['known_paths'].items():
        if path_info['type'] == 'step_file':
            step_file_nodes[file_hash(path)] = path_info['designation']

    task_name = "%s/%s" % (log['step']['name'], log['run']['run_id'])
    cluster_hash = misc.str_to_sha1(task_name)
    pipe_hash['clusters'][cluster_hash] = dict()
    pipe_hash['clusters'][cluster_hash]['task_name'] = task_name
    pipe_hash['clusters'][cluster_hash]['group'] = list()
    for node in pipe_hash['nodes'].keys():
        if not node in step_file_nodes:
            pipe_hash['clusters'][cluster_hash]['group'].append(node)
                
    start_time = log['start_time']
    end_time = log['end_time']
    duration = end_time - start_time

    text = "Task: %s\\lHost: %s\\lDuration: %s\\l" % (
        task_name, socket.gethostname(),
        misc.duration_to_str(duration, long = True)
    )
    pipe_hash['graph_labels'][task_name] = text
    if 'max' in log['pipeline_log']['process_watcher']:
        text = "CPU: %1.1f%%, %d CORES_Requested , RAM: %s (%1.1f%%)\\l" % (
            log['pipeline_log']['process_watcher']['max']['sum']['cpu_percent'],
            log['step']['cores'],
            misc.bytes_to_str(log['pipeline_log']['process_watcher']['max']\
                              ['sum']['rss']), 
            log['pipeline_log']['process_watcher']['max']['sum']['memory_percent'])
        pipe_hash['graph_labels'][task_name] += text
    if 'signal' in log:
        pipe_hash['graph_labels'][task_name] += "Caught signal: %s\\l" % (
            process_pool.ProcessPool.SIGNAL_NAMES[log['signal']])
    pipe_hash['graph_labels'][task_name] += "\\l"
    return pipe_hash
Beispiel #7
0
 def file_hash(path):
     if path in log['step']['known_paths']:
         if 'real_path' in log['step']['known_paths'][path]:
             path = log['step']['known_paths'][path]['real_path']
     return misc.str_to_sha1(path)
Beispiel #8
0
 def pid_hash(pid, suffix=''):
     hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'],
                                pid, suffix)
     return misc.str_to_sha1(hashtag)
Beispiel #9
0
def create_hash_from_annotation(log):
    def pid_hash(pid, suffix=''):
        hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'],
                                   pid, suffix)
        return misc.str_to_sha1(hashtag)

    def file_hash(path):
        if path in log['step']['known_paths']:
            if 'real_path' in log['step']['known_paths'][path]:
                path = log['step']['known_paths'][path]['real_path']
        return misc.str_to_sha1(path)

    pipe_hash = dict()
    pipe_hash['nodes'] = dict()
    pipe_hash['edges'] = dict()
    pipe_hash['clusters'] = dict()
    pipe_hash['graph_labels'] = dict()

    def add_file_node(path):
        if not path in log['step']['known_paths']:
            return

        if 'real_path' in log['step']['known_paths'][path]:
            path = log['step']['known_paths'][path]['real_path']
        label = os.path.basename(path)
        color = '#ffffff'
        if log['step']['known_paths'][path]['type'] in ['fifo', 'directory']:
            color = '#c4f099'
        elif log['step']['known_paths'][path]['type'] == 'file':
            color = '#8ae234'
        elif log['step']['known_paths'][path]['type'] == 'step_file':
            color = '#97b7c8'
            label = log['step']['known_paths'][path]['label']
            if path in log['step']['known_paths']:
                if 'size' in log['step']['known_paths'][path]:
                    label += "\\n%s" % misc.bytes_to_str(
                        log['step']['known_paths'][path]['size'])
        pipe_hash['nodes'][misc.str_to_sha1(path)] = {
            'label': label,
            'fillcolor': color
        }

    for proc_info in log['pipeline_log']['processes']:
        pid = proc_info['pid']
        # Set name and label variable
        try:
            name = proc_info['name']
            label = "%s" % (proc_info['name'])
        except KeyError:
            name = '(unknown)'
            label = "PID %d" % pid

        try:
            # Add file nodes for every file in hints
            for path in proc_info['hints']['writes']:
                add_file_node(path)
                pipe_hash['edges'][(pid_hash(pid), file_hash(path))] = dict()
        except KeyError:
            pass

        try:
            # Add all the info for each process to pipe_hash
            stripped_args = []
            is_output_file = False
            for arg in proc_info['args']:
                # Try to investigate how fifos are integrated in data stream
                # Hier muss die Entscheidung rein ob eine Datei für Input oder
                # Output genutzt wird
                io_type = None
                if name == 'cat':
                    is_output_file = False
                elif name == 'dd' and arg.startswith('of='):
                    is_output_file = True
                elif name == 'dd' and arg.startswith('if='):
                    is_output_file = False
                elif name in ['mkdir', 'mkfifo']:
                    is_output_file = True
                for known_path in log['step']['known_paths'].keys():
                    # Check if arg contains a known path ...
                    if known_path in arg:
                        # ... if so add this file to the graph
                        add_file_node(known_path)
                        # Is the process able to in-/output files?
                        if name in ['cat', 'dd', 'mkdir', 'mkfifo']:
                            if is_output_file:
                                io_type = 'output'
                            else:
                                io_type = 'input'
                        elif name == 'fix_cutadapt.py':
                            if arg == proc_info['args'][-2]:
                                io_type = 'input'
                            elif arg == proc_info['args'][-1]:
                                io_type = 'output'
                            elif proc_info[ proc_info['args'].index(arg) - 1 ] \
                                 == '--R2-in':
                                io_type = 'input'
                            elif proc_info[ proc_info['args'].index(arg) - 1 ] \
                                 == '--R2-out':
                                io_type = 'output'
                        else:
                            # we can't know whether the fifo is for input or
                            # output, first look at the hints, then use the
                            # designation (if any was given)
                            if 'reads' in proc_info['hints'] and \
                               arg in proc_info['hints']['reads']:
                                io_type = 'input'
                            if 'writes' in proc_info['hints'] and \
                               arg in proc_info['hints']['writes']:
                                io_type = 'output'
                            if io_type is None:
                                io_type = log['step']['known_paths'][known_path]\
                                            ['designation']
                                if io_type is None:
                                    io_type = 'input'

                                print('io_type: %s\nknown_path: %s' %
                                      (io_type, known_path))

                        if io_type == 'input':
                            # add edge from file to proc
                            pipe_hash['edges']\
                                [(file_hash(known_path),pid_hash(pid))] = dict()
                        elif io_type == 'output':
                            # add edge from proc to file
                            pipe_hash['edges']\
                                [(pid_hash(pid), file_hash(known_path))] = dict()

                        basename = os.path.basename(known_path)
                        #if log['step']['known_paths'][known_path]['type'] != \
                        #   'step_file':
                        arg = arg.replace(known_path, basename)
#                            break
#                        else:
#                            arg = basename
#                            break
                    else:
                        #                        if arg[0:4] != '/dev':
                        #                            arg = os.path.basename(arg)
                        if (len(arg) > 16) and re.match('^[A-Z]+$', arg):
                            arg = "%s[...]" % arg[:16]
                stripped_args.append(
                    arg.replace('\t', '\\t').replace('\\', '\\\\'))

            tw = textwrap.TextWrapper(width=50,
                                      break_long_words=False,
                                      break_on_hyphens=False)
            label = "%s" % ("\\n".join(tw.wrap(' '.join(stripped_args))))

        # If any key wasn't around let's go on
        except KeyError:
            pass

        # add proc
        something_went_wrong = False
        if 'signal' in proc_info:
            something_went_wrong = True
        elif 'exit_code' in proc_info:
            if proc_info['exit_code'] != 0:
                something_went_wrong = True
        else:
            something_went_wrong = True
        color = "#fce94f"
        if something_went_wrong:
            if not pid in log['pipeline_log']['ok_to_fail']:
                color = "#d5291a"
            if 'signal' in proc_info:
                label = "%s\\n(received %s%s)" % (
                    label,
                    'friendly ' \
                    if pid in log['pipeline_log']['ok_to_fail'] else '',
                    proc_info['signal_name'] if 'signal_name' in \
                    proc_info else 'signal %d' % proc_info['signal'])
            elif 'exit_code' in proc_info:
                if proc_info['exit_code'] != 0:
                    label = "%s\\n(failed with exit code %d)" % (
                        label, proc_info['exit_code'])
            else:
                label = "%s\\n(no exit code)" % label

        if 'max' in log['pipeline_log']['process_watcher']:
            if pid in log['pipeline_log']['process_watcher']['max']:
                label += "\\n%1.1f%% CPU, %s RAM (%1.1f%%)" % (
                    log['pipeline_log']['process_watcher']['max'][pid]\
                    ['cpu_percent'],
                    misc.bytes_to_str(
                        log['pipeline_log']['process_watcher']['max'][pid]\
                        ['rss']),
                    log['pipeline_log']['process_watcher']['max'][pid]\
                    ['memory_percent'])

        pipe_hash['nodes'][pid_hash(pid)] = {
            'label': label,
            'fillcolor': color,
            'start_time': proc_info['start_time']
        }

        for which in ['stdout', 'stderr']:
            key = "%s_copy" % which
            if key in proc_info:
                if ('exit_code' in proc_info[key]) and \
                   (proc_info[key]['exit_code'] == 0) and \
                   ('length' in proc_info[key]) and \
                   (proc_info[key]['length'] == 0) and \
                   (not 'sink_full_path' in proc_info[key]):
                    # skip this stdout/stderr box if it leads to nothing
                    continue
                size_label = '(empty)'
                if ('length' in proc_info[key]) and \
                   (proc_info[key]['length'] > 0):
                    speed = float(proc_info[key]['length']) / (
                        proc_info[key]['end_time'] -
                        proc_info[key]['start_time']).total_seconds()
                    speed_label = "%s/s" % misc.bytes_to_str(speed)
                    size_label = "%s / %s lines (%s)" % (misc.bytes_to_str(
                        proc_info[key]['length']), "{:,}".format(
                            proc_info[key]['lines']), speed_label)
                label = "%s\\n%s" % (which, size_label)

                something_went_wrong = False
                if 'signal' in proc_info[key]:
                    something_went_wrong = True
                elif 'exit_code' in proc_info[key]:
                    if proc_info[key]['exit_code'] != 0:
                        something_went_wrong = True
                else:
                    something_went_wrong = True
                color = "#fdf3a7"
                if something_went_wrong:
                    if not pid in log['pipeline_log']['ok_to_fail']:
                        color = "#d5291a"
                    if 'signal' in proc_info[key]:
                        label = "%s\\n(received %s%s)" % (
                            label,
                            "friendly " if pid in \
                            log['pipeline_log']['ok_to_fail'] else '',
                            proc_info[key]['signal_name'] if 'signal_name'\
                            in proc_info[key] else 'signal %d' %
                            proc_info[key]['signal'])
                    elif 'exit_code' in proc_info[key]:
                        if proc_info[key]['exit_code'] != 0:
                            label = "%s\\n(failed with exit code %d)" % (
                                label, proc_info[key]['exit_code'])
                    else:
                        label = "%s\\n(no exit code)" % label

                # add proc_which
                pipe_hash['nodes'][pid_hash(pid, which)] = {
                    'label': label,
                    'fillcolor': color
                }
                if 'sink_full_path' in proc_info[key]:
                    path = proc_info[key]['sink_full_path']
                    add_file_node(path)

    for proc_info in copy.deepcopy(log['pipeline_log']['processes']):
        pid = proc_info['pid']
        if 'use_stdin_of' in proc_info:
            other_pid = proc_info['use_stdin_of']
            pipe_hash['edges'][(pid_hash(other_pid, 'stdout'), pid_hash(pid))] \
                = dict()
        for which in ['stdout', 'stderr']:
            key = "%s_copy" % which
            if key in proc_info:
                other_pid = proc_info[key]['pid']
                pipe_hash['edges'][(pid_hash(pid), pid_hash(pid,
                                                            which))] = dict()
                if 'sink_full_path' in proc_info[key]:
                    pipe_hash['edges'][(pid_hash(
                        pid, which), file_hash(
                            proc_info[key]['sink_full_path']))] = dict()

    # define nodes which go into subgraph
    step_file_nodes = dict()
    for path, path_info in log['step']['known_paths'].items():
        if path_info['type'] == 'step_file':
            step_file_nodes[file_hash(path)] = path_info['designation']

    task_name = "%s/%s" % (log['step']['name'], log['run']['run_id'])
    cluster_hash = misc.str_to_sha1(task_name)
    pipe_hash['clusters'][cluster_hash] = dict()
    pipe_hash['clusters'][cluster_hash]['task_name'] = task_name
    pipe_hash['clusters'][cluster_hash]['group'] = list()
    for node in pipe_hash['nodes'].keys():
        if not node in step_file_nodes:
            pipe_hash['clusters'][cluster_hash]['group'].append(node)

    start_time = log['start_time']
    end_time = log['end_time']
    duration = end_time - start_time

    text = "Task: %s\\lHost: %s\\lDuration: %s\\l" % (
        task_name, socket.gethostname(),
        misc.duration_to_str(duration, long=True))
    pipe_hash['graph_labels'][task_name] = text
    if 'max' in log['pipeline_log']['process_watcher']:
        text = "CPU: %1.1f%%, %d CORES_Requested , RAM: %s (%1.1f%%)\\l" % (
            log['pipeline_log']['process_watcher']['max']['sum']['cpu_percent'],
            log['step']['cores'],
            misc.bytes_to_str(log['pipeline_log']['process_watcher']['max']\
                              ['sum']['rss']),
            log['pipeline_log']['process_watcher']['max']['sum']['memory_percent'])
        pipe_hash['graph_labels'][task_name] += text
    if 'signal' in log:
        pipe_hash['graph_labels'][task_name] += "Caught signal: %s\\l" % (
            process_pool.ProcessPool.SIGNAL_NAMES[log['signal']])
    pipe_hash['graph_labels'][task_name] += "\\l"
    return pipe_hash