def add_file_node(path): if not path in log['step']['known_paths']: return if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] label = log['step']['known_paths'][path]['label'] color = '#ffffff' if log['step']['known_paths'][path]['type'] == 'fifo': color = '#c4f099' elif log['step']['known_paths'][path]['type'] == 'file': color = '#8ae234' elif log['step']['known_paths'][path]['type'] == 'step_file': color = '#97b7c8' if path in log['step']['known_paths']: if 'size' in log['step']['known_paths'][path]: label += "\\n%s" % misc.bytes_to_str( log['step']['known_paths'][path]['size']) hash['nodes'][misc.str_to_sha1(path)] = { 'label': label, 'fillcolor': color }
def add_file_node(path): if not path in log['step']['known_paths']: return if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] label = os.path.basename(path) color = '#ffffff' if log['step']['known_paths'][path]['type'] in ['fifo', 'directory']: color = '#c4f099' elif log['step']['known_paths'][path]['type'] == 'file': color = '#8ae234' elif log['step']['known_paths'][path]['type'] == 'step_file': color = '#97b7c8' label = log['step']['known_paths'][path]['label'] if path in log['step']['known_paths']: if 'size' in log['step']['known_paths'][path]: label += "\\n%s" % misc.bytes_to_str( log['step']['known_paths'][path]['size']) pipe_hash['nodes'][misc.str_to_sha1(path)] = { 'label': label, 'fillcolor': color }
def file_hash(path): if path in log['step']['known_paths']: if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] return misc.str_to_sha1(path)
def pid_hash(pid, suffix = ''): hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'], pid, suffix) return misc.str_to_sha1(hashtag)
def render_pipeline_hash(log): def pid_hash(pid, suffix = ''): hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'], pid, suffix) return misc.str_to_sha1(hashtag) def file_hash(path): if path in log['step']['known_paths']: if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] return misc.str_to_sha1(path) hash = dict() hash['nodes'] = dict() hash['edges'] = dict() hash['clusters'] = dict() hash['graph_labels'] = dict() def add_file_node(path): if not path in log['step']['known_paths']: return if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] label = log['step']['known_paths'][path]['label'] color = '#ffffff' if log['step']['known_paths'][path]['type'] == 'fifo': color = '#c4f099' elif log['step']['known_paths'][path]['type'] == 'file': color = '#8ae234' elif log['step']['known_paths'][path]['type'] == 'step_file': color = '#97b7c8' if path in log['step']['known_paths']: if 'size' in log['step']['known_paths'][path]: label += "\\n%s" % misc.bytes_to_str( log['step']['known_paths'][path]['size']) hash['nodes'][misc.str_to_sha1(path)] = { 'label': label, 'fillcolor': color } for proc_info in copy.deepcopy(log['pipeline_log']['processes']): pid = proc_info['pid'] label = "PID %d" % pid name = '(unknown)' if 'name' in proc_info: name = proc_info['name'] label = "%s" % (proc_info['name']) if 'writes' in proc_info['hints']: for path in proc_info['hints']['writes']: add_file_node(path) if 'args' in proc_info: stripped_args = [] for arg in copy.deepcopy(proc_info['args']): if arg in log['step']['known_paths']: add_file_node(arg) if arg in log['step']['known_paths']: if log['step']['known_paths'][arg]['type'] != 'step_file': arg = log['step']['known_paths'][arg]['label'] else: arg = os.path.basename(arg) else: if arg[0:4] != '/dev': arg = os.path.basename(arg) if (len(arg) > 16) and re.match('^[A-Z]+$', arg): arg = "%s[...]" % arg[:16] stripped_args.append(arg.replace('\t', '\\t').replace( '\\', '\\\\')) tw = textwrap.TextWrapper( width = 50, break_long_words = False, break_on_hyphens = False) label = "%s" % ("\\n".join(tw.wrap(' '.join(stripped_args)))) if 'args' in proc_info: output_fifo = False for arg in proc_info['args']: fifo_type = None if name == 'cat4m' and arg == '-o': output_fifo = True elif name == 'dd' and arg.startswith('of='): output_fifo = True for known_path in log['step']['known_paths'].keys(): if known_path in arg: add_file_node(known_path) if name in ['cat4m', 'dd']: if output_fifo: fifo_type = 'output' else: fifo_type = 'input' else: # we can't know whether the fifo is for input or # output, first look at the hints, then use the # designation (if any was given) if 'reads' in proc_info['hints'] and \ arg in proc_info['hints']['reads']: fifo_type = 'input' if 'writes' in proc_info['hints'] and \ arg in proc_info['hints']['writes']: fifo_type = 'output' if fifo_type is None: fifo_type = log['step']['known_paths'][arg]\ ['designation'] if fifo_type == 'input': # add edge from file to proc hash['edges'][(file_hash(arg), pid_hash(pid))] \ = dict() elif fifo_type == 'output': # add edge from proc to file hash['edges'][(pid_hash(pid), file_hash(arg))] \ = dict() if 'writes' in proc_info['hints']: for path in proc_info['hints']['writes']: hash['edges'][(pid_hash(pid), file_hash(path))] = dict() # add proc something_went_wrong = False if 'signal' in proc_info: something_went_wrong = True elif 'exit_code' in proc_info: if proc_info['exit_code'] != 0: something_went_wrong = True else: something_went_wrong = True color = "#fce94f" if something_went_wrong: if not pid in log['pipeline_log']['ok_to_fail']: color = "#d5291a" if 'signal' in proc_info: label = "%s\\n(received %s%s)" % ( label, 'friendly ' \ if pid in log['pipeline_log']['ok_to_fail'] else '', proc_info['signal_name'] if 'signal_name' in \ proc_info else 'signal %d' % proc_info['signal']) elif 'exit_code' in proc_info: if proc_info['exit_code'] != 0: label = "%s\\n(failed with exit code %d)" % ( label, proc_info['exit_code']) else: label = "%s\\n(no exit code)" % label if 'max' in log['pipeline_log']['process_watcher']: if pid in log['pipeline_log']['process_watcher']['max']: label += "\\n%1.1f%% CPU, %s RAM (%1.1f%%)" % ( log['pipeline_log']['process_watcher']['max'][pid]\ ['cpu_percent'], misc.bytes_to_str( log['pipeline_log']['process_watcher']['max'][pid]\ ['rss']), log['pipeline_log']['process_watcher']['max'][pid]\ ['memory_percent']) hash['nodes'][pid_hash(pid)] = { 'label': label, 'fillcolor': color } for which in ['stdout', 'stderr']: key = "%s_copy" % which if key in proc_info: if ('exit_code' in proc_info[key]) and \ (proc_info[key]['exit_code'] == 0) and \ ('length' in proc_info[key]) and \ (proc_info[key]['length'] == 0) and \ (not 'sink_full_path' in proc_info[key]): # skip this stdout/stderr box if it leads to nothing continue size_label = '(empty)' if ('length' in proc_info[key]) and \ (proc_info[key]['length'] > 0): speed = float(proc_info[key]['length']) / ( proc_info[key]['end_time'] - proc_info[key]['start_time']).total_seconds() speed_label = "%s/s" % misc.bytes_to_str(speed) size_label = "%s / %s lines (%s)" % ( misc.bytes_to_str(proc_info[key]['length']), "{:,}".format(proc_info[key]['lines']), speed_label) label = "%s\\n%s" % (which, size_label) something_went_wrong = False if 'signal' in proc_info[key]: something_went_wrong = True elif 'exit_code' in proc_info[key]: if proc_info[key]['exit_code'] != 0: something_went_wrong = True else: something_went_wrong = True color = "#fdf3a7" if something_went_wrong: if not pid in log['pipeline_log']['ok_to_fail']: color = "#d5291a" if 'signal' in proc_info[key]: label = "%s\\n(received %s%s)" % ( label, "friendly " if pid in \ log['pipeline_log']['ok_to_fail'] else '', proc_info[key]['signal_name'] if 'signal_name'\ in proc_info[key] else 'signal %d' % proc_info[key]['signal']) elif 'exit_code' in proc_info[key]: if proc_info[key]['exit_code'] != 0: label = "%s\\n(failed with exit code %d)" % ( label, proc_info[key]['exit_code']) else: label = "%s\\n(no exit code)" % label # add proc_which hash['nodes'][pid_hash(pid, which)] = { 'label': label, 'fillcolor': color } if 'sink_full_path' in proc_info[key]: path = proc_info[key]['sink_full_path'] add_file_node(path) for proc_info in copy.deepcopy(log['pipeline_log']['processes']): pid = proc_info['pid'] if 'use_stdin_of' in proc_info: other_pid = proc_info['use_stdin_of'] hash['edges'][(pid_hash(other_pid, 'stdout'), pid_hash(pid))] \ = dict() for which in ['stdout', 'stderr']: key = "%s_copy" % which if key in proc_info: other_pid = proc_info[key]['pid'] hash['edges'][(pid_hash(pid), pid_hash(pid, which))] = dict() if 'sink_full_path' in proc_info[key]: hash['edges'][( pid_hash(pid, which), file_hash(proc_info[key]['sink_full_path']))] = dict() # define nodes which go into subgraph step_file_nodes = dict() for path, path_info in log['step']['known_paths'].items(): if path_info['type'] == 'step_file': step_file_nodes[file_hash(path)] = path_info['designation'] task_name = "%s/%s" % (log['step']['name'], log['run']['run_id']) cluster_hash = misc.str_to_sha1(task_name) hash['clusters'][cluster_hash] = dict() hash['clusters'][cluster_hash]['task_name'] = task_name hash['clusters'][cluster_hash]['group'] = list() for node in hash['nodes'].keys(): if not node in step_file_nodes: hash['clusters'][cluster_hash]['group'].append(node) start_time = log['start_time'] end_time = log['end_time'] duration = end_time - start_time text = "Task: %s\\lHost: %s\\lDuration: %s\\l" % ( task_name, socket.gethostname(), misc.duration_to_str(duration, long = True) ) hash['graph_labels'][task_name] = text if 'max' in log['pipeline_log']['process_watcher']: text = "CPU: %1.1f%%, %d CORES_Requested , RAM: %s (%1.1f%%)\\l" % ( log['pipeline_log']['process_watcher']['max']['sum']['cpu_percent'], log['step']['cores'], misc.bytes_to_str(log['pipeline_log']['process_watcher']['max']\ ['sum']['rss']), log['pipeline_log']['process_watcher']['max']['sum']['memory_percent']) hash['graph_labels'][task_name] += text if 'signal' in log: hash['graph_labels'][task_name] += "Caught signal: %s\\l" % ( process_pool.ProcessPool.SIGNAL_NAMES[log['signal']]) hash['graph_labels'][task_name] += "\\l" return hash
def create_hash_from_annotation(log): def pid_hash(pid, suffix = ''): hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'], pid, suffix) return misc.str_to_sha1(hashtag) def file_hash(path): if path in log['step']['known_paths']: if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] return misc.str_to_sha1(path) pipe_hash = dict() pipe_hash['nodes'] = dict() pipe_hash['edges'] = dict() pipe_hash['clusters'] = dict() pipe_hash['graph_labels'] = dict() def add_file_node(path): if not path in log['step']['known_paths']: return if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] label = os.path.basename(path) color = '#ffffff' if log['step']['known_paths'][path]['type'] in ['fifo', 'directory']: color = '#c4f099' elif log['step']['known_paths'][path]['type'] == 'file': color = '#8ae234' elif log['step']['known_paths'][path]['type'] == 'step_file': color = '#97b7c8' label = log['step']['known_paths'][path]['label'] if path in log['step']['known_paths']: if 'size' in log['step']['known_paths'][path]: label += "\\n%s" % misc.bytes_to_str( log['step']['known_paths'][path]['size']) pipe_hash['nodes'][misc.str_to_sha1(path)] = { 'label': label, 'fillcolor': color } for proc_info in log['pipeline_log']['processes']: pid = proc_info['pid'] # Set name and label variable try: name = proc_info['name'] label = "%s" % (proc_info['name']) except KeyError: name = '(unknown)' label = "PID %d" % pid try: # Add file nodes for every file in hints for path in proc_info['hints']['writes']: add_file_node(path) pipe_hash['edges'][(pid_hash(pid), file_hash(path))] = dict() except KeyError: pass try: # Add all the info for each process to pipe_hash stripped_args = [] is_output_file = False for arg in proc_info['args']: # Try to investigate how fifos are integrated in data stream # Hier muss die Entscheidung rein ob eine Datei für Input oder # Output genutzt wird io_type = None if name == 'cat': is_output_file = False elif name == 'dd' and arg.startswith('of='): is_output_file = True elif name == 'dd' and arg.startswith('if='): is_output_file = False elif name in ['mkdir', 'mkfifo']: is_output_file = True for known_path in log['step']['known_paths'].keys(): # Check if arg contains a known path ... if known_path in arg: # ... if so add this file to the graph add_file_node(known_path) # Is the process able to in-/output files? if name in ['cat', 'dd', 'mkdir', 'mkfifo']: if is_output_file: io_type = 'output' else: io_type = 'input' elif name == 'fix_cutadapt.py': if arg == proc_info['args'][-2]: io_type = 'input' elif arg == proc_info['args'][-1]: io_type = 'output' elif proc_info[ proc_info['args'].index(arg) - 1 ] \ == '--R2-in': io_type = 'input' elif proc_info[ proc_info['args'].index(arg) - 1 ] \ == '--R2-out': io_type = 'output' else: # we can't know whether the fifo is for input or # output, first look at the hints, then use the # designation (if any was given) if 'reads' in proc_info['hints'] and \ arg in proc_info['hints']['reads']: io_type = 'input' if 'writes' in proc_info['hints'] and \ arg in proc_info['hints']['writes']: io_type = 'output' if io_type is None: io_type = log['step']['known_paths'][known_path]\ ['designation'] if io_type is None: io_type = 'input' print('io_type: %s\nknown_path: %s' % (io_type, known_path) ) if io_type == 'input': # add edge from file to proc pipe_hash['edges']\ [(file_hash(known_path),pid_hash(pid))] = dict() elif io_type == 'output': # add edge from proc to file pipe_hash['edges']\ [(pid_hash(pid), file_hash(known_path))] = dict() basename = os.path.basename(known_path) #if log['step']['known_paths'][known_path]['type'] != \ # 'step_file': arg = arg.replace(known_path, basename) # break # else: # arg = basename # break else: # if arg[0:4] != '/dev': # arg = os.path.basename(arg) if (len(arg) > 16) and re.match('^[A-Z]+$', arg): arg = "%s[...]" % arg[:16] stripped_args.append(arg.replace('\t', '\\t').replace( '\\', '\\\\')) tw = textwrap.TextWrapper( width = 50, break_long_words = False, break_on_hyphens = False) label = "%s" % ("\\n".join(tw.wrap(' '.join(stripped_args)))) # If any key wasn't around let's go on except KeyError: pass # add proc something_went_wrong = False if 'signal' in proc_info: something_went_wrong = True elif 'exit_code' in proc_info: if proc_info['exit_code'] != 0: something_went_wrong = True else: something_went_wrong = True color = "#fce94f" if something_went_wrong: if not pid in log['pipeline_log']['ok_to_fail']: color = "#d5291a" if 'signal' in proc_info: label = "%s\\n(received %s%s)" % ( label, 'friendly ' \ if pid in log['pipeline_log']['ok_to_fail'] else '', proc_info['signal_name'] if 'signal_name' in \ proc_info else 'signal %d' % proc_info['signal']) elif 'exit_code' in proc_info: if proc_info['exit_code'] != 0: label = "%s\\n(failed with exit code %d)" % ( label, proc_info['exit_code']) else: label = "%s\\n(no exit code)" % label if 'max' in log['pipeline_log']['process_watcher']: if pid in log['pipeline_log']['process_watcher']['max']: label += "\\n%1.1f%% CPU, %s RAM (%1.1f%%)" % ( log['pipeline_log']['process_watcher']['max'][pid]\ ['cpu_percent'], misc.bytes_to_str( log['pipeline_log']['process_watcher']['max'][pid]\ ['rss']), log['pipeline_log']['process_watcher']['max'][pid]\ ['memory_percent']) pipe_hash['nodes'][pid_hash(pid)] = { 'label': label, 'fillcolor': color, 'start_time': proc_info['start_time'] } for which in ['stdout', 'stderr']: key = "%s_copy" % which if key in proc_info: if ('exit_code' in proc_info[key]) and \ (proc_info[key]['exit_code'] == 0) and \ ('length' in proc_info[key]) and \ (proc_info[key]['length'] == 0) and \ (not 'sink_full_path' in proc_info[key]): # skip this stdout/stderr box if it leads to nothing continue size_label = '(empty)' if ('length' in proc_info[key]) and \ (proc_info[key]['length'] > 0): speed = float(proc_info[key]['length']) / ( proc_info[key]['end_time'] - proc_info[key]['start_time']).total_seconds() speed_label = "%s/s" % misc.bytes_to_str(speed) size_label = "%s / %s lines (%s)" % ( misc.bytes_to_str(proc_info[key]['length']), "{:,}".format(proc_info[key]['lines']), speed_label) label = "%s\\n%s" % (which, size_label) something_went_wrong = False if 'signal' in proc_info[key]: something_went_wrong = True elif 'exit_code' in proc_info[key]: if proc_info[key]['exit_code'] != 0: something_went_wrong = True else: something_went_wrong = True color = "#fdf3a7" if something_went_wrong: if not pid in log['pipeline_log']['ok_to_fail']: color = "#d5291a" if 'signal' in proc_info[key]: label = "%s\\n(received %s%s)" % ( label, "friendly " if pid in \ log['pipeline_log']['ok_to_fail'] else '', proc_info[key]['signal_name'] if 'signal_name'\ in proc_info[key] else 'signal %d' % proc_info[key]['signal']) elif 'exit_code' in proc_info[key]: if proc_info[key]['exit_code'] != 0: label = "%s\\n(failed with exit code %d)" % ( label, proc_info[key]['exit_code']) else: label = "%s\\n(no exit code)" % label # add proc_which pipe_hash['nodes'][pid_hash(pid, which)] = { 'label': label, 'fillcolor': color } if 'sink_full_path' in proc_info[key]: path = proc_info[key]['sink_full_path'] add_file_node(path) for proc_info in copy.deepcopy(log['pipeline_log']['processes']): pid = proc_info['pid'] if 'use_stdin_of' in proc_info: other_pid = proc_info['use_stdin_of'] pipe_hash['edges'][(pid_hash(other_pid, 'stdout'), pid_hash(pid))] \ = dict() for which in ['stdout', 'stderr']: key = "%s_copy" % which if key in proc_info: other_pid = proc_info[key]['pid'] pipe_hash['edges'][(pid_hash(pid), pid_hash(pid, which))] = dict() if 'sink_full_path' in proc_info[key]: pipe_hash['edges'][( pid_hash(pid, which), file_hash(proc_info[key]['sink_full_path']))] = dict() # define nodes which go into subgraph step_file_nodes = dict() for path, path_info in log['step']['known_paths'].items(): if path_info['type'] == 'step_file': step_file_nodes[file_hash(path)] = path_info['designation'] task_name = "%s/%s" % (log['step']['name'], log['run']['run_id']) cluster_hash = misc.str_to_sha1(task_name) pipe_hash['clusters'][cluster_hash] = dict() pipe_hash['clusters'][cluster_hash]['task_name'] = task_name pipe_hash['clusters'][cluster_hash]['group'] = list() for node in pipe_hash['nodes'].keys(): if not node in step_file_nodes: pipe_hash['clusters'][cluster_hash]['group'].append(node) start_time = log['start_time'] end_time = log['end_time'] duration = end_time - start_time text = "Task: %s\\lHost: %s\\lDuration: %s\\l" % ( task_name, socket.gethostname(), misc.duration_to_str(duration, long = True) ) pipe_hash['graph_labels'][task_name] = text if 'max' in log['pipeline_log']['process_watcher']: text = "CPU: %1.1f%%, %d CORES_Requested , RAM: %s (%1.1f%%)\\l" % ( log['pipeline_log']['process_watcher']['max']['sum']['cpu_percent'], log['step']['cores'], misc.bytes_to_str(log['pipeline_log']['process_watcher']['max']\ ['sum']['rss']), log['pipeline_log']['process_watcher']['max']['sum']['memory_percent']) pipe_hash['graph_labels'][task_name] += text if 'signal' in log: pipe_hash['graph_labels'][task_name] += "Caught signal: %s\\l" % ( process_pool.ProcessPool.SIGNAL_NAMES[log['signal']]) pipe_hash['graph_labels'][task_name] += "\\l" return pipe_hash
def pid_hash(pid, suffix=''): hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'], pid, suffix) return misc.str_to_sha1(hashtag)
def create_hash_from_annotation(log): def pid_hash(pid, suffix=''): hashtag = "%s/%s/%d/%s" % (log['step']['name'], log['run']['run_id'], pid, suffix) return misc.str_to_sha1(hashtag) def file_hash(path): if path in log['step']['known_paths']: if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] return misc.str_to_sha1(path) pipe_hash = dict() pipe_hash['nodes'] = dict() pipe_hash['edges'] = dict() pipe_hash['clusters'] = dict() pipe_hash['graph_labels'] = dict() def add_file_node(path): if not path in log['step']['known_paths']: return if 'real_path' in log['step']['known_paths'][path]: path = log['step']['known_paths'][path]['real_path'] label = os.path.basename(path) color = '#ffffff' if log['step']['known_paths'][path]['type'] in ['fifo', 'directory']: color = '#c4f099' elif log['step']['known_paths'][path]['type'] == 'file': color = '#8ae234' elif log['step']['known_paths'][path]['type'] == 'step_file': color = '#97b7c8' label = log['step']['known_paths'][path]['label'] if path in log['step']['known_paths']: if 'size' in log['step']['known_paths'][path]: label += "\\n%s" % misc.bytes_to_str( log['step']['known_paths'][path]['size']) pipe_hash['nodes'][misc.str_to_sha1(path)] = { 'label': label, 'fillcolor': color } for proc_info in log['pipeline_log']['processes']: pid = proc_info['pid'] # Set name and label variable try: name = proc_info['name'] label = "%s" % (proc_info['name']) except KeyError: name = '(unknown)' label = "PID %d" % pid try: # Add file nodes for every file in hints for path in proc_info['hints']['writes']: add_file_node(path) pipe_hash['edges'][(pid_hash(pid), file_hash(path))] = dict() except KeyError: pass try: # Add all the info for each process to pipe_hash stripped_args = [] is_output_file = False for arg in proc_info['args']: # Try to investigate how fifos are integrated in data stream # Hier muss die Entscheidung rein ob eine Datei für Input oder # Output genutzt wird io_type = None if name == 'cat': is_output_file = False elif name == 'dd' and arg.startswith('of='): is_output_file = True elif name == 'dd' and arg.startswith('if='): is_output_file = False elif name in ['mkdir', 'mkfifo']: is_output_file = True for known_path in log['step']['known_paths'].keys(): # Check if arg contains a known path ... if known_path in arg: # ... if so add this file to the graph add_file_node(known_path) # Is the process able to in-/output files? if name in ['cat', 'dd', 'mkdir', 'mkfifo']: if is_output_file: io_type = 'output' else: io_type = 'input' elif name == 'fix_cutadapt.py': if arg == proc_info['args'][-2]: io_type = 'input' elif arg == proc_info['args'][-1]: io_type = 'output' elif proc_info[ proc_info['args'].index(arg) - 1 ] \ == '--R2-in': io_type = 'input' elif proc_info[ proc_info['args'].index(arg) - 1 ] \ == '--R2-out': io_type = 'output' else: # we can't know whether the fifo is for input or # output, first look at the hints, then use the # designation (if any was given) if 'reads' in proc_info['hints'] and \ arg in proc_info['hints']['reads']: io_type = 'input' if 'writes' in proc_info['hints'] and \ arg in proc_info['hints']['writes']: io_type = 'output' if io_type is None: io_type = log['step']['known_paths'][known_path]\ ['designation'] if io_type is None: io_type = 'input' print('io_type: %s\nknown_path: %s' % (io_type, known_path)) if io_type == 'input': # add edge from file to proc pipe_hash['edges']\ [(file_hash(known_path),pid_hash(pid))] = dict() elif io_type == 'output': # add edge from proc to file pipe_hash['edges']\ [(pid_hash(pid), file_hash(known_path))] = dict() basename = os.path.basename(known_path) #if log['step']['known_paths'][known_path]['type'] != \ # 'step_file': arg = arg.replace(known_path, basename) # break # else: # arg = basename # break else: # if arg[0:4] != '/dev': # arg = os.path.basename(arg) if (len(arg) > 16) and re.match('^[A-Z]+$', arg): arg = "%s[...]" % arg[:16] stripped_args.append( arg.replace('\t', '\\t').replace('\\', '\\\\')) tw = textwrap.TextWrapper(width=50, break_long_words=False, break_on_hyphens=False) label = "%s" % ("\\n".join(tw.wrap(' '.join(stripped_args)))) # If any key wasn't around let's go on except KeyError: pass # add proc something_went_wrong = False if 'signal' in proc_info: something_went_wrong = True elif 'exit_code' in proc_info: if proc_info['exit_code'] != 0: something_went_wrong = True else: something_went_wrong = True color = "#fce94f" if something_went_wrong: if not pid in log['pipeline_log']['ok_to_fail']: color = "#d5291a" if 'signal' in proc_info: label = "%s\\n(received %s%s)" % ( label, 'friendly ' \ if pid in log['pipeline_log']['ok_to_fail'] else '', proc_info['signal_name'] if 'signal_name' in \ proc_info else 'signal %d' % proc_info['signal']) elif 'exit_code' in proc_info: if proc_info['exit_code'] != 0: label = "%s\\n(failed with exit code %d)" % ( label, proc_info['exit_code']) else: label = "%s\\n(no exit code)" % label if 'max' in log['pipeline_log']['process_watcher']: if pid in log['pipeline_log']['process_watcher']['max']: label += "\\n%1.1f%% CPU, %s RAM (%1.1f%%)" % ( log['pipeline_log']['process_watcher']['max'][pid]\ ['cpu_percent'], misc.bytes_to_str( log['pipeline_log']['process_watcher']['max'][pid]\ ['rss']), log['pipeline_log']['process_watcher']['max'][pid]\ ['memory_percent']) pipe_hash['nodes'][pid_hash(pid)] = { 'label': label, 'fillcolor': color, 'start_time': proc_info['start_time'] } for which in ['stdout', 'stderr']: key = "%s_copy" % which if key in proc_info: if ('exit_code' in proc_info[key]) and \ (proc_info[key]['exit_code'] == 0) and \ ('length' in proc_info[key]) and \ (proc_info[key]['length'] == 0) and \ (not 'sink_full_path' in proc_info[key]): # skip this stdout/stderr box if it leads to nothing continue size_label = '(empty)' if ('length' in proc_info[key]) and \ (proc_info[key]['length'] > 0): speed = float(proc_info[key]['length']) / ( proc_info[key]['end_time'] - proc_info[key]['start_time']).total_seconds() speed_label = "%s/s" % misc.bytes_to_str(speed) size_label = "%s / %s lines (%s)" % (misc.bytes_to_str( proc_info[key]['length']), "{:,}".format( proc_info[key]['lines']), speed_label) label = "%s\\n%s" % (which, size_label) something_went_wrong = False if 'signal' in proc_info[key]: something_went_wrong = True elif 'exit_code' in proc_info[key]: if proc_info[key]['exit_code'] != 0: something_went_wrong = True else: something_went_wrong = True color = "#fdf3a7" if something_went_wrong: if not pid in log['pipeline_log']['ok_to_fail']: color = "#d5291a" if 'signal' in proc_info[key]: label = "%s\\n(received %s%s)" % ( label, "friendly " if pid in \ log['pipeline_log']['ok_to_fail'] else '', proc_info[key]['signal_name'] if 'signal_name'\ in proc_info[key] else 'signal %d' % proc_info[key]['signal']) elif 'exit_code' in proc_info[key]: if proc_info[key]['exit_code'] != 0: label = "%s\\n(failed with exit code %d)" % ( label, proc_info[key]['exit_code']) else: label = "%s\\n(no exit code)" % label # add proc_which pipe_hash['nodes'][pid_hash(pid, which)] = { 'label': label, 'fillcolor': color } if 'sink_full_path' in proc_info[key]: path = proc_info[key]['sink_full_path'] add_file_node(path) for proc_info in copy.deepcopy(log['pipeline_log']['processes']): pid = proc_info['pid'] if 'use_stdin_of' in proc_info: other_pid = proc_info['use_stdin_of'] pipe_hash['edges'][(pid_hash(other_pid, 'stdout'), pid_hash(pid))] \ = dict() for which in ['stdout', 'stderr']: key = "%s_copy" % which if key in proc_info: other_pid = proc_info[key]['pid'] pipe_hash['edges'][(pid_hash(pid), pid_hash(pid, which))] = dict() if 'sink_full_path' in proc_info[key]: pipe_hash['edges'][(pid_hash( pid, which), file_hash( proc_info[key]['sink_full_path']))] = dict() # define nodes which go into subgraph step_file_nodes = dict() for path, path_info in log['step']['known_paths'].items(): if path_info['type'] == 'step_file': step_file_nodes[file_hash(path)] = path_info['designation'] task_name = "%s/%s" % (log['step']['name'], log['run']['run_id']) cluster_hash = misc.str_to_sha1(task_name) pipe_hash['clusters'][cluster_hash] = dict() pipe_hash['clusters'][cluster_hash]['task_name'] = task_name pipe_hash['clusters'][cluster_hash]['group'] = list() for node in pipe_hash['nodes'].keys(): if not node in step_file_nodes: pipe_hash['clusters'][cluster_hash]['group'].append(node) start_time = log['start_time'] end_time = log['end_time'] duration = end_time - start_time text = "Task: %s\\lHost: %s\\lDuration: %s\\l" % ( task_name, socket.gethostname(), misc.duration_to_str(duration, long=True)) pipe_hash['graph_labels'][task_name] = text if 'max' in log['pipeline_log']['process_watcher']: text = "CPU: %1.1f%%, %d CORES_Requested , RAM: %s (%1.1f%%)\\l" % ( log['pipeline_log']['process_watcher']['max']['sum']['cpu_percent'], log['step']['cores'], misc.bytes_to_str(log['pipeline_log']['process_watcher']['max']\ ['sum']['rss']), log['pipeline_log']['process_watcher']['max']['sum']['memory_percent']) pipe_hash['graph_labels'][task_name] += text if 'signal' in log: pipe_hash['graph_labels'][task_name] += "Caught signal: %s\\l" % ( process_pool.ProcessPool.SIGNAL_NAMES[log['signal']]) pipe_hash['graph_labels'][task_name] += "\\l" return pipe_hash