def test_no_more_memory_than_specified(): LOG_FILENAME = 'callback.log' my_logger = logging.getLogger('callback') my_logger.setLevel(logging.DEBUG) # Add the log message handler to the logger handler = logging.FileHandler(LOG_FILENAME) my_logger.addHandler(handler) max_memory = 1 pipe = pe.Workflow(name='pipe') n1 = pe.Node(interface=SingleNodeTestInterface(), name='n1') n2 = pe.Node(interface=SingleNodeTestInterface(), name='n2') n3 = pe.Node(interface=SingleNodeTestInterface(), name='n3') n4 = pe.Node(interface=SingleNodeTestInterface(), name='n4') n1.interface.estimated_memory_gb = 1 n2.interface.estimated_memory_gb = 1 n3.interface.estimated_memory_gb = 1 n4.interface.estimated_memory_gb = 1 pipe.connect(n1, 'output1', n2, 'input1') pipe.connect(n1, 'output1', n3, 'input1') pipe.connect(n2, 'output1', n4, 'input1') pipe.connect(n3, 'output1', n4, 'input2') n1.inputs.input1 = 1 pipe.run(plugin='MultiProc', plugin_args={ 'memory_gb': max_memory, 'status_callback': log_nodes_cb }) nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME) last_node = nodes[-1] #usage in every second memory, threads = find_metrics(nodes, last_node) result = True for m in memory: if m > max_memory: result = False break assert result max_threads = cpu_count() result = True for t in threads: if t > max_threads: result = False break assert result,\ "using more threads than system has (threads is not specified by user)" os.remove(LOG_FILENAME)
def test_no_more_memory_than_specified(): LOG_FILENAME = 'callback.log' my_logger = logging.getLogger('callback') my_logger.setLevel(logging.DEBUG) # Add the log message handler to the logger handler = logging.FileHandler(LOG_FILENAME) my_logger.addHandler(handler) max_memory = 1 pipe = pe.Workflow(name='pipe') n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1') n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2') n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3') n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4') n1.interface.estimated_memory_gb = 1 n2.interface.estimated_memory_gb = 1 n3.interface.estimated_memory_gb = 1 n4.interface.estimated_memory_gb = 1 pipe.connect(n1, 'output1', n2, 'input1') pipe.connect(n1, 'output1', n3, 'input1') pipe.connect(n2, 'output1', n4, 'input1') pipe.connect(n3, 'output1', n4, 'input2') n1.inputs.input1 = 1 pipe.run(plugin='MultiProc', plugin_args={'memory_gb': max_memory, 'status_callback': log_nodes_cb}) nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME) last_node = nodes[-1] #usage in every second memory, threads = find_metrics(nodes, last_node) result = True for m in memory: if m > max_memory: result = False break yield assert_equal, result, True max_threads = cpu_count() result = True for t in threads: if t > max_threads: result = False break yield assert_equal, result, True,\ "using more threads than system has (threads is not specified by user)" os.remove(LOG_FILENAME)
def resource_overusage_report(cblog): '''Function to parse through a callback log for memory and/or thread usage above estimates / limits. Parameters ---------- cblog: str path to callback.log Returns ------- text_report: str excessive: dict ''' cb_dict_list = log_to_dict(cblog) excessive = { node['id']: [ node['runtime_memory_gb'] if node.get('runtime_memory_gb', 0) > node.get('estimated_memory_gb', 1) else None, node['estimated_memory_gb'] if node.get('runtime_memory_gb', 0) > node.get('estimated_memory_gb', 1) else None, node['runtime_threads'] - 1 if node.get('runtime_threads', 0) - 1 > node.get('num_threads', 1) else None, node['num_threads'] if node.get('runtime_threads', 0) - 1 > node.get('num_threads', 1) else None ] for node in [ node for node in cb_dict_list if (node.get('runtime_memory_gb', 0) > node.get('estimated_memory_gb', 1) # or node.get('runtime_threads', 0) - 1 > node.get('num_threads', 1) ) ] } text_report = '' if excessive: text_report += 'The following nodes used excessive resources:\n' dotted_line = '-' * (len(text_report) - 1) + '\n' text_report += dotted_line for node in excessive: node_id = '\n .'.join(node.split('.')) text_report += f'\n{node_id}\n' if excessive[node][0]: text_report += ' **memory_gb**\n' \ ' runtime > estimated\n' \ f' {excessive[node][0]} ' \ f'> {excessive[node][1]}\n' # JC: I'm not convinced 'runtime_threads' and 'threads' are # comparable in nipype ~1.5.1 # if excessive[node][2]: # text_report += ' **threads**\n runtime > limit\n' \ # f' {excessive[node][2]} ' \ # f'> {excessive[node][3]}\n' text_report += dotted_line return text_report, excessive
def test_do_not_use_more_threads_then_specified(): LOG_FILENAME = 'callback.log' my_logger = logging.getLogger('callback') my_logger.setLevel(logging.DEBUG) # Add the log message handler to the logger handler = logging.FileHandler(LOG_FILENAME) my_logger.addHandler(handler) max_threads = 4 pipe = pe.Workflow(name='pipe') n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1') n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2') n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3') n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4') n1.interface.num_threads = 1 n2.interface.num_threads = 1 n3.interface.num_threads = 4 n4.interface.num_threads = 1 pipe.connect(n1, 'output1', n2, 'input1') pipe.connect(n1, 'output1', n3, 'input1') pipe.connect(n2, 'output1', n4, 'input1') pipe.connect(n3, 'output1', n4, 'input2') n1.inputs.input1 = 4 pipe.config['execution']['poll_sleep_duration'] = 1 pipe.run(plugin='MultiProc', plugin_args={ 'n_procs': max_threads, 'status_callback': log_nodes_cb }) nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME) last_node = nodes[-1] #usage in every second memory, threads = find_metrics(nodes, last_node) result = True for t in threads: if t > max_threads: result = False break yield assert_equal, result, True, "using more threads than specified" max_memory = get_system_total_memory_gb() result = True for m in memory: if m > max_memory: result = False break yield assert_equal, result, True,\ "using more memory than system has (memory is not specified by user)" os.remove(LOG_FILENAME)
def test_no_more_threads_than_specified(): LOG_FILENAME = 'callback.log' my_logger = logging.getLogger('callback') my_logger.setLevel(logging.DEBUG) # Add the log message handler to the logger handler = logging.FileHandler(LOG_FILENAME) my_logger.addHandler(handler) max_threads = 4 pipe = pe.Workflow(name='pipe') n1 = pe.Node(interface=SingleNodeTestInterface(), name='n1') n2 = pe.Node(interface=SingleNodeTestInterface(), name='n2') n3 = pe.Node(interface=SingleNodeTestInterface(), name='n3') n4 = pe.Node(interface=SingleNodeTestInterface(), name='n4') n1.interface.num_threads = 1 n2.interface.num_threads = 1 n3.interface.num_threads = 4 n4.interface.num_threads = 1 pipe.connect(n1, 'output1', n2, 'input1') pipe.connect(n1, 'output1', n3, 'input1') pipe.connect(n2, 'output1', n4, 'input1') pipe.connect(n3, 'output1', n4, 'input2') n1.inputs.input1 = 4 pipe.config['execution']['poll_sleep_duration'] = 1 pipe.run(plugin='MultiProc', plugin_args={'n_procs': max_threads, 'status_callback': log_nodes_cb}) nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME) last_node = nodes[-1] #usage in every second memory, threads = find_metrics(nodes, last_node) result = True for t in threads: if t > max_threads: result = False break assert result, "using more threads than specified" max_memory = get_system_total_memory_gb() result = True for m in memory: if m > max_memory: result = False break assert result,\ "using more memory than system has (memory is not specified by user)" os.remove(LOG_FILENAME)
def generate_gantt_chart( logfile, cores, minute_scale=10, space_between_minutes=50, colors=["#7070FF", "#4E4EB2", "#2D2D66", "#9B9BFF"], ): """ Generates a gantt chart in html showing the workflow execution based on a callback log file. This script was intended to be used with the MultiprocPlugin. The following code shows how to set up the workflow in order to generate the log file: Parameters ---------- logfile : string filepath to the callback log file to plot the gantt chart of cores : integer the number of cores given to the workflow via the 'n_procs' plugin arg minute_scale : integer (optional); default=10 the scale, in minutes, at which to plot line markers for the gantt chart; for example, minute_scale=10 means there are lines drawn at every 10 minute interval from start to finish space_between_minutes : integer (optional); default=50 scale factor in pixel spacing between minute line markers colors : list (optional) a list of colors to choose from when coloring the nodes in the gantt chart Returns ------- None the function does not return any value but writes out an html file in the same directory as the callback log path passed in Usage ----- # import logging # import logging.handlers # from nipype.utils.profiler import log_nodes_cb # log_filename = 'callback.log' # logger = logging.getLogger('callback') # logger.setLevel(logging.DEBUG) # handler = logging.FileHandler(log_filename) # logger.addHandler(handler) # #create workflow # workflow = ... # workflow.run(plugin='MultiProc', # plugin_args={'n_procs':8, 'memory':12, 'status_callback': log_nodes_cb}) # generate_gantt_chart('callback.log', 8) """ # noqa: E501 # add the html header html_string = """<!DOCTYPE html> <head> <style> #content{ width:99%; height:100%; position:absolute; } .node{ background-color:#7070FF; border-radius: 5px; position:absolute; width:20px; white-space:pre-wrap; } .line{ position: absolute; color: #C2C2C2; opacity: 0.5; margin: 0px; } .time{ position: absolute; font-size: 16px; color: #666666; margin: 0px; } .bar{ position: absolute; height: 1px; opacity: 0.7; } .dot{ position: absolute; width: 1px; height: 1px; background-color: red; } .label { width:20px; height:20px; opacity: 0.7; display: inline-block; } </style> </head> <body> <div id="content"> <div style="display:inline-block;"> """ # noqa: E501 close_header = """ </div> <div style="display:inline-block;margin-left:60px;vertical-align: top;"> <p><span><div class="label" style="background-color:#90BBD7;"></div> Estimated Resource</span></p> <p><span><div class="label" style="background-color:#03969D;"></div> Actual Resource</span></p> <p><span><div class="label" style="background-color:#f00;"></div> Failed Node</span></p> </div> """ # noqa: E501 # Read in json-log to get list of node dicts nodes_list = log_to_dict(logfile) # Only include nodes with timing information, and covert timestamps # from strings to datetimes nodes_list = [{ k: datetime.datetime.strptime(i[k], "%Y-%m-%dT%H:%M:%S.%f") if k in {"start", "finish"} else i[k] for k in i } for i in nodes_list if "start" in i and "finish" in i] for node in nodes_list: if "duration" not in node: node["duration"] = (node["finish"] - node["start"]).total_seconds() # Create the header of the report with useful information start_node = nodes_list[0] last_node = nodes_list[-1] duration = (last_node["finish"] - start_node["start"]).total_seconds() # Get events based dictionary of node run stats events = create_event_dict(start_node["start"], nodes_list) # Summary strings of workflow at top html_string += ("<p>Start: " + start_node["start"].strftime("%Y-%m-%d %H:%M:%S") + "</p>") html_string += ("<p>Finish: " + last_node["finish"].strftime("%Y-%m-%d %H:%M:%S") + "</p>") html_string += "<p>Duration: " + "{0:.2f}".format(duration / 60) \ + " minutes</p>" html_string += "<p>Nodes: " + str(len(nodes_list)) + "</p>" html_string += "<p>Cores: " + str(cores) + "</p>" html_string += close_header # Draw nipype nodes Gantt chart and runtimes html_string += draw_lines(start_node["start"], duration, minute_scale, space_between_minutes) html_string += draw_nodes( start_node["start"], nodes_list, cores, minute_scale, space_between_minutes, colors, ) # Get memory timeseries estimated_mem_ts = calculate_resource_timeseries(events, "estimated_memory_gb") runtime_mem_ts = calculate_resource_timeseries(events, "runtime_memory_gb") # Plot gantt chart resource_offset = 120 + 30 * cores html_string += draw_resource_bar( start_node["start"], last_node["finish"], estimated_mem_ts, space_between_minutes, minute_scale, "#90BBD7", resource_offset * 2 + 120, "Memory", ) html_string += draw_resource_bar( start_node["start"], last_node["finish"], runtime_mem_ts, space_between_minutes, minute_scale, "#03969D", resource_offset * 2 + 120, "Memory", ) # Get threads timeseries estimated_threads_ts = calculate_resource_timeseries( events, "estimated_threads") runtime_threads_ts = calculate_resource_timeseries(events, "runtime_threads") # Plot gantt chart html_string += draw_resource_bar( start_node["start"], last_node["finish"], estimated_threads_ts, space_between_minutes, minute_scale, "#90BBD7", resource_offset, "Threads", ) html_string += draw_resource_bar( start_node["start"], last_node["finish"], runtime_threads_ts, space_between_minutes, minute_scale, "#03969D", resource_offset, "Threads", ) # finish html html_string += """ </div> </body>""" # save file with open(logfile + ".html", "w") as html_file: html_file.write(html_string)
def gen_runtime_plots(callback_log, img_desc): ''' Function to generate box plots of the runtime and memory usage for the CPAC vs AFNI implementations Parameters ---------- map_yaml : string filepath to the mapping dictoinary yaml file between afni and cpac centrality outputs img_desc : string a string describing the type of images being analyzed; this string will be used to title and name the plot png file Returns ------- out_png : string filepath to the produced output png file ''' # Import packages import matplotlib.pyplot as plt from nipype.utils.draw_gantt_chart import log_to_dict # Init variables nodes_list = log_to_dict(callback_log) # Isolate cpac and afni finish nodes cpac_nodes = [node for node in nodes_list if \ node['name'] == 'calculate_centrality' and node.has_key('finish')] afni_nodes = [node for node in nodes_list if \ node['name'] == 'afni_centrality' and node.has_key('finish')] # Gather cpac stats cpac_mems = [float(node['runtime_memory_gb']) for node in cpac_nodes] cpac_times = [node['duration'] for node in cpac_nodes] # Gather afni stats afni_mems = [float(node['runtime_memory_gb']) for node in afni_nodes] afni_times = [node['duration'] for node in afni_nodes] # Init pngs mem_png = callback_log.split('.log')[0] + '_memories.png' runtime_png = callback_log.split('.log')[0] + '_runtimes.png' # Set up plot - memory plt.boxplot([cpac_mems, afni_mems]) plt.xticks(range(1, 3), ['C-PAC', 'AFNI']) plt.title('CPAC-AFNI memory usage for: %s' % img_desc) plt.xlabel('Implementation') plt.ylabel('Memory used (GB)') plt.grid() # Save figure fig = plt.gcf() fig.set_size_inches(16, 12) fig.tight_layout() plt.savefig(mem_png, dpi=200) # Clear and close plt.clf() plt.close() # Set up plot - runtime plt.boxplot([cpac_times, afni_times]) plt.xticks(range(1, 3), ['C-PAC', 'AFNI']) plt.title('CPAC-AFNI runtime for: %s' % img_desc) plt.xlabel('Implementation') plt.ylabel('Runtime (seconds)') plt.grid() # Save figure fig = plt.gcf() fig.set_size_inches(16, 12) fig.tight_layout() plt.savefig(runtime_png, dpi=200) # Clear and close plt.clf() plt.close()