Example #1
0
def test_no_more_memory_than_specified():
    LOG_FILENAME = 'callback.log'
    my_logger = logging.getLogger('callback')
    my_logger.setLevel(logging.DEBUG)

    # Add the log message handler to the logger
    handler = logging.FileHandler(LOG_FILENAME)
    my_logger.addHandler(handler)

    max_memory = 1
    pipe = pe.Workflow(name='pipe')
    n1 = pe.Node(interface=SingleNodeTestInterface(), name='n1')
    n2 = pe.Node(interface=SingleNodeTestInterface(), name='n2')
    n3 = pe.Node(interface=SingleNodeTestInterface(), name='n3')
    n4 = pe.Node(interface=SingleNodeTestInterface(), name='n4')

    n1.interface.estimated_memory_gb = 1
    n2.interface.estimated_memory_gb = 1
    n3.interface.estimated_memory_gb = 1
    n4.interface.estimated_memory_gb = 1

    pipe.connect(n1, 'output1', n2, 'input1')
    pipe.connect(n1, 'output1', n3, 'input1')
    pipe.connect(n2, 'output1', n4, 'input1')
    pipe.connect(n3, 'output1', n4, 'input2')
    n1.inputs.input1 = 1

    pipe.run(plugin='MultiProc',
             plugin_args={
                 'memory_gb': max_memory,
                 'status_callback': log_nodes_cb
             })

    nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME)
    last_node = nodes[-1]
    #usage in every second
    memory, threads = find_metrics(nodes, last_node)

    result = True
    for m in memory:
        if m > max_memory:
            result = False
            break

    assert result

    max_threads = cpu_count()

    result = True
    for t in threads:
        if t > max_threads:
            result = False
            break

    assert result,\
        "using more threads than system has (threads is not specified by user)"

    os.remove(LOG_FILENAME)
Example #2
0
def test_no_more_memory_than_specified():
    LOG_FILENAME = 'callback.log'
    my_logger = logging.getLogger('callback')
    my_logger.setLevel(logging.DEBUG)

    # Add the log message handler to the logger
    handler = logging.FileHandler(LOG_FILENAME)
    my_logger.addHandler(handler)

    max_memory = 1
    pipe = pe.Workflow(name='pipe')
    n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1')
    n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2')
    n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
    n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')

    n1.interface.estimated_memory_gb = 1
    n2.interface.estimated_memory_gb = 1
    n3.interface.estimated_memory_gb = 1
    n4.interface.estimated_memory_gb = 1

    pipe.connect(n1, 'output1', n2, 'input1')
    pipe.connect(n1, 'output1', n3, 'input1')
    pipe.connect(n2, 'output1', n4, 'input1')
    pipe.connect(n3, 'output1', n4, 'input2')
    n1.inputs.input1 = 1

    pipe.run(plugin='MultiProc',
             plugin_args={'memory_gb': max_memory,
                          'status_callback': log_nodes_cb})


    nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME)
    last_node = nodes[-1]
    #usage in every second
    memory, threads = find_metrics(nodes, last_node)

    result = True
    for m in memory:
        if m > max_memory:
            result = False
            break

    yield assert_equal, result, True

    max_threads = cpu_count()

    result = True
    for t in threads:
        if t > max_threads:
            result = False
            break

    yield assert_equal, result, True,\
          "using more threads than system has (threads is not specified by user)"

    os.remove(LOG_FILENAME)
Example #3
0
def resource_overusage_report(cblog):
    '''Function to parse through a callback log for memory and/or
    thread usage above estimates / limits.

    Parameters
    ----------
    cblog: str
        path to callback.log

    Returns
    -------
    text_report: str

    excessive: dict
    '''
    cb_dict_list = log_to_dict(cblog)
    excessive = {
        node['id']: [
            node['runtime_memory_gb'] if node.get('runtime_memory_gb', 0) >
            node.get('estimated_memory_gb', 1) else None,
            node['estimated_memory_gb'] if node.get('runtime_memory_gb', 0) >
            node.get('estimated_memory_gb', 1) else None,
            node['runtime_threads'] - 1 if node.get('runtime_threads', 0) -
            1 > node.get('num_threads', 1) else None,
            node['num_threads'] if node.get('runtime_threads', 0) -
            1 > node.get('num_threads', 1) else None
        ]
        for node in [
            node for node in cb_dict_list if
            (node.get('runtime_memory_gb', 0) >
             node.get('estimated_memory_gb', 1)
             # or node.get('runtime_threads', 0) - 1 > node.get('num_threads', 1)
             )
        ]
    }
    text_report = ''
    if excessive:
        text_report += 'The following nodes used excessive resources:\n'
        dotted_line = '-' * (len(text_report) - 1) + '\n'
        text_report += dotted_line
        for node in excessive:
            node_id = '\n  .'.join(node.split('.'))
            text_report += f'\n{node_id}\n'
            if excessive[node][0]:
                text_report += '      **memory_gb**\n' \
                               '        runtime > estimated\n' \
                               f'        {excessive[node][0]} ' \
                               f'> {excessive[node][1]}\n'
            # JC: I'm not convinced 'runtime_threads' and 'threads' are
            # comparable in nipype ~1.5.1
            # if excessive[node][2]:
            #     text_report += '      **threads**\n        runtime > limit\n' \
            #                    f'        {excessive[node][2]} ' \
            #                    f'> {excessive[node][3]}\n'
        text_report += dotted_line
    return text_report, excessive
Example #4
0
def test_do_not_use_more_threads_then_specified():
    LOG_FILENAME = 'callback.log'
    my_logger = logging.getLogger('callback')
    my_logger.setLevel(logging.DEBUG)

    # Add the log message handler to the logger
    handler = logging.FileHandler(LOG_FILENAME)
    my_logger.addHandler(handler)

    max_threads = 4
    pipe = pe.Workflow(name='pipe')
    n1 = pe.Node(interface=TestInterfaceSingleNode(), name='n1')
    n2 = pe.Node(interface=TestInterfaceSingleNode(), name='n2')
    n3 = pe.Node(interface=TestInterfaceSingleNode(), name='n3')
    n4 = pe.Node(interface=TestInterfaceSingleNode(), name='n4')

    n1.interface.num_threads = 1
    n2.interface.num_threads = 1
    n3.interface.num_threads = 4
    n4.interface.num_threads = 1

    pipe.connect(n1, 'output1', n2, 'input1')
    pipe.connect(n1, 'output1', n3, 'input1')
    pipe.connect(n2, 'output1', n4, 'input1')
    pipe.connect(n3, 'output1', n4, 'input2')
    n1.inputs.input1 = 4
    pipe.config['execution']['poll_sleep_duration'] = 1
    pipe.run(plugin='MultiProc',
             plugin_args={
                 'n_procs': max_threads,
                 'status_callback': log_nodes_cb
             })

    nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME)
    last_node = nodes[-1]
    #usage in every second
    memory, threads = find_metrics(nodes, last_node)

    result = True
    for t in threads:
        if t > max_threads:
            result = False
            break

    yield assert_equal, result, True, "using more threads than specified"

    max_memory = get_system_total_memory_gb()
    result = True
    for m in memory:
        if m > max_memory:
            result = False
            break
    yield assert_equal, result, True,\
          "using more memory than system has (memory is not specified by user)"

    os.remove(LOG_FILENAME)
Example #5
0
def test_no_more_threads_than_specified():
    LOG_FILENAME = 'callback.log'
    my_logger = logging.getLogger('callback')
    my_logger.setLevel(logging.DEBUG)

    # Add the log message handler to the logger
    handler = logging.FileHandler(LOG_FILENAME)
    my_logger.addHandler(handler)

    max_threads = 4
    pipe = pe.Workflow(name='pipe')
    n1 = pe.Node(interface=SingleNodeTestInterface(), name='n1')
    n2 = pe.Node(interface=SingleNodeTestInterface(), name='n2')
    n3 = pe.Node(interface=SingleNodeTestInterface(), name='n3')
    n4 = pe.Node(interface=SingleNodeTestInterface(), name='n4')

    n1.interface.num_threads = 1
    n2.interface.num_threads = 1
    n3.interface.num_threads = 4
    n4.interface.num_threads = 1

    pipe.connect(n1, 'output1', n2, 'input1')
    pipe.connect(n1, 'output1', n3, 'input1')
    pipe.connect(n2, 'output1', n4, 'input1')
    pipe.connect(n3, 'output1', n4, 'input2')
    n1.inputs.input1 = 4
    pipe.config['execution']['poll_sleep_duration'] = 1
    pipe.run(plugin='MultiProc', plugin_args={'n_procs': max_threads,
                                              'status_callback': log_nodes_cb})

    nodes = draw_gantt_chart.log_to_dict(LOG_FILENAME)
    last_node = nodes[-1]
    #usage in every second
    memory, threads = find_metrics(nodes, last_node)

    result = True
    for t in threads:
        if t > max_threads:
            result = False
            break

    assert result, "using more threads than specified"

    max_memory = get_system_total_memory_gb()
    result = True
    for m in memory:
        if m > max_memory:
            result = False
            break
    assert result,\
        "using more memory than system has (memory is not specified by user)"

    os.remove(LOG_FILENAME)
Example #6
0
def generate_gantt_chart(
    logfile,
    cores,
    minute_scale=10,
    space_between_minutes=50,
    colors=["#7070FF", "#4E4EB2", "#2D2D66", "#9B9BFF"],
):
    """
    Generates a gantt chart in html showing the workflow execution based on a callback log file.
    This script was intended to be used with the MultiprocPlugin.
    The following code shows how to set up the workflow in order to generate the log file:

    Parameters
    ----------
    logfile : string
        filepath to the callback log file to plot the gantt chart of
    cores : integer
        the number of cores given to the workflow via the 'n_procs'
        plugin arg
    minute_scale : integer (optional); default=10
        the scale, in minutes, at which to plot line markers for the
        gantt chart; for example, minute_scale=10 means there are lines
        drawn at every 10 minute interval from start to finish
    space_between_minutes : integer (optional); default=50
        scale factor in pixel spacing between minute line markers
    colors : list (optional)
        a list of colors to choose from when coloring the nodes in the
        gantt chart

    Returns
    -------
    None
        the function does not return any value but writes out an html
        file in the same directory as the callback log path passed in

    Usage
    -----
    # import logging
    # import logging.handlers
    # from nipype.utils.profiler import log_nodes_cb

    # log_filename = 'callback.log'
    # logger = logging.getLogger('callback')
    # logger.setLevel(logging.DEBUG)
    # handler = logging.FileHandler(log_filename)
    # logger.addHandler(handler)

    # #create workflow
    # workflow = ...

    # workflow.run(plugin='MultiProc',
    #     plugin_args={'n_procs':8, 'memory':12, 'status_callback': log_nodes_cb})

    # generate_gantt_chart('callback.log', 8)
    """  # noqa: E501

    # add the html header
    html_string = """<!DOCTYPE html>
    <head>
        <style>
            #content{
                width:99%;
                height:100%;
                position:absolute;
            }

            .node{
                background-color:#7070FF;
                border-radius: 5px;
                position:absolute;
                width:20px;
                white-space:pre-wrap;
            }

            .line{
                position: absolute;
                color: #C2C2C2;
                opacity: 0.5;
                margin: 0px;
            }

            .time{
                position: absolute;
                font-size: 16px;
                color: #666666;
                margin: 0px;
            }

            .bar{
                position: absolute;
                height: 1px;
                opacity: 0.7;
            }

            .dot{
                position: absolute;
                width: 1px;
                height: 1px;
                background-color: red;
            }
            .label {
                width:20px;
                height:20px;
                opacity: 0.7;
                display: inline-block;
            }
        </style>
    </head>

    <body>
        <div id="content">
            <div style="display:inline-block;">
    """  # noqa: E501

    close_header = """
    </div>
    <div style="display:inline-block;margin-left:60px;vertical-align: top;">
        <p><span><div class="label" style="background-color:#90BBD7;"></div> Estimated Resource</span></p>
        <p><span><div class="label" style="background-color:#03969D;"></div> Actual Resource</span></p>
        <p><span><div class="label" style="background-color:#f00;"></div> Failed Node</span></p>
    </div>
    """  # noqa: E501

    # Read in json-log to get list of node dicts
    nodes_list = log_to_dict(logfile)

    # Only include nodes with timing information, and covert timestamps
    # from strings to datetimes
    nodes_list = [{
        k: datetime.datetime.strptime(i[k], "%Y-%m-%dT%H:%M:%S.%f")
        if k in {"start", "finish"} else i[k]
        for k in i
    } for i in nodes_list if "start" in i and "finish" in i]

    for node in nodes_list:
        if "duration" not in node:
            node["duration"] = (node["finish"] - node["start"]).total_seconds()

    # Create the header of the report with useful information
    start_node = nodes_list[0]
    last_node = nodes_list[-1]
    duration = (last_node["finish"] - start_node["start"]).total_seconds()

    # Get events based dictionary of node run stats
    events = create_event_dict(start_node["start"], nodes_list)

    # Summary strings of workflow at top
    html_string += ("<p>Start: " +
                    start_node["start"].strftime("%Y-%m-%d %H:%M:%S") + "</p>")
    html_string += ("<p>Finish: " +
                    last_node["finish"].strftime("%Y-%m-%d %H:%M:%S") + "</p>")
    html_string += "<p>Duration: " + "{0:.2f}".format(duration / 60) \
                   + " minutes</p>"
    html_string += "<p>Nodes: " + str(len(nodes_list)) + "</p>"
    html_string += "<p>Cores: " + str(cores) + "</p>"
    html_string += close_header
    # Draw nipype nodes Gantt chart and runtimes
    html_string += draw_lines(start_node["start"], duration, minute_scale,
                              space_between_minutes)
    html_string += draw_nodes(
        start_node["start"],
        nodes_list,
        cores,
        minute_scale,
        space_between_minutes,
        colors,
    )

    # Get memory timeseries
    estimated_mem_ts = calculate_resource_timeseries(events,
                                                     "estimated_memory_gb")
    runtime_mem_ts = calculate_resource_timeseries(events, "runtime_memory_gb")
    # Plot gantt chart
    resource_offset = 120 + 30 * cores
    html_string += draw_resource_bar(
        start_node["start"],
        last_node["finish"],
        estimated_mem_ts,
        space_between_minutes,
        minute_scale,
        "#90BBD7",
        resource_offset * 2 + 120,
        "Memory",
    )
    html_string += draw_resource_bar(
        start_node["start"],
        last_node["finish"],
        runtime_mem_ts,
        space_between_minutes,
        minute_scale,
        "#03969D",
        resource_offset * 2 + 120,
        "Memory",
    )

    # Get threads timeseries
    estimated_threads_ts = calculate_resource_timeseries(
        events, "estimated_threads")
    runtime_threads_ts = calculate_resource_timeseries(events,
                                                       "runtime_threads")
    # Plot gantt chart
    html_string += draw_resource_bar(
        start_node["start"],
        last_node["finish"],
        estimated_threads_ts,
        space_between_minutes,
        minute_scale,
        "#90BBD7",
        resource_offset,
        "Threads",
    )
    html_string += draw_resource_bar(
        start_node["start"],
        last_node["finish"],
        runtime_threads_ts,
        space_between_minutes,
        minute_scale,
        "#03969D",
        resource_offset,
        "Threads",
    )

    # finish html
    html_string += """
        </div>
    </body>"""

    # save file
    with open(logfile + ".html", "w") as html_file:
        html_file.write(html_string)
Example #7
0
def gen_runtime_plots(callback_log, img_desc):
    '''
    Function to generate box plots of the runtime and memory usage
    for the CPAC vs AFNI implementations

    Parameters
    ----------
    map_yaml : string
        filepath to the mapping dictoinary yaml file between afni
        and cpac centrality outputs
    img_desc : string
        a string describing the type of images being analyzed; this
        string will be used to title and name the plot png file

    Returns
    -------
    out_png : string
        filepath to the produced output png file
    '''

    # Import packages
    import matplotlib.pyplot as plt
    from nipype.utils.draw_gantt_chart import log_to_dict

    # Init variables
    nodes_list = log_to_dict(callback_log)

    # Isolate cpac and afni finish nodes
    cpac_nodes = [node for node in nodes_list if \
                  node['name'] == 'calculate_centrality' and node.has_key('finish')]
    afni_nodes = [node for node in nodes_list if \
                  node['name'] == 'afni_centrality' and node.has_key('finish')]

    # Gather cpac stats
    cpac_mems = [float(node['runtime_memory_gb']) for node in cpac_nodes]
    cpac_times = [node['duration'] for node in cpac_nodes]

    # Gather afni stats
    afni_mems = [float(node['runtime_memory_gb']) for node in afni_nodes]
    afni_times = [node['duration'] for node in afni_nodes]

    # Init pngs
    mem_png = callback_log.split('.log')[0] + '_memories.png'
    runtime_png = callback_log.split('.log')[0] + '_runtimes.png'

    # Set up plot - memory
    plt.boxplot([cpac_mems, afni_mems])
    plt.xticks(range(1, 3), ['C-PAC', 'AFNI'])
    plt.title('CPAC-AFNI memory usage for: %s' % img_desc)
    plt.xlabel('Implementation')
    plt.ylabel('Memory used (GB)')
    plt.grid()

    # Save figure
    fig = plt.gcf()
    fig.set_size_inches(16, 12)
    fig.tight_layout()

    plt.savefig(mem_png, dpi=200)

    # Clear and close
    plt.clf()
    plt.close()

    # Set up plot - runtime
    plt.boxplot([cpac_times, afni_times])
    plt.xticks(range(1, 3), ['C-PAC', 'AFNI'])
    plt.title('CPAC-AFNI runtime for: %s' % img_desc)
    plt.xlabel('Implementation')
    plt.ylabel('Runtime (seconds)')
    plt.grid()

    # Save figure
    fig = plt.gcf()
    fig.set_size_inches(16, 12)
    fig.tight_layout()

    plt.savefig(runtime_png, dpi=200)

    # Clear and close
    plt.clf()
    plt.close()
Example #8
0
def gen_runtime_plots(callback_log, img_desc):
    '''
    Function to generate box plots of the runtime and memory usage
    for the CPAC vs AFNI implementations

    Parameters
    ----------
    map_yaml : string
        filepath to the mapping dictoinary yaml file between afni
        and cpac centrality outputs
    img_desc : string
        a string describing the type of images being analyzed; this
        string will be used to title and name the plot png file

    Returns
    -------
    out_png : string
        filepath to the produced output png file
    '''

    # Import packages
    import matplotlib.pyplot as plt
    from nipype.utils.draw_gantt_chart import log_to_dict

    # Init variables
    nodes_list = log_to_dict(callback_log)

    # Isolate cpac and afni finish nodes
    cpac_nodes = [node for node in nodes_list if \
                  node['name'] == 'calculate_centrality' and node.has_key('finish')]
    afni_nodes = [node for node in nodes_list if \
                  node['name'] == 'afni_centrality' and node.has_key('finish')]

    # Gather cpac stats
    cpac_mems = [float(node['runtime_memory_gb']) for node in cpac_nodes]
    cpac_times = [node['duration'] for node in cpac_nodes]

    # Gather afni stats
    afni_mems = [float(node['runtime_memory_gb']) for node in afni_nodes]
    afni_times = [node['duration'] for node in afni_nodes]

    # Init pngs
    mem_png = callback_log.split('.log')[0] + '_memories.png'
    runtime_png = callback_log.split('.log')[0] + '_runtimes.png'

    # Set up plot - memory
    plt.boxplot([cpac_mems, afni_mems])
    plt.xticks(range(1, 3), ['C-PAC', 'AFNI'])
    plt.title('CPAC-AFNI memory usage for: %s' % img_desc)
    plt.xlabel('Implementation')
    plt.ylabel('Memory used (GB)')
    plt.grid()

    # Save figure
    fig = plt.gcf()
    fig.set_size_inches(16, 12)
    fig.tight_layout()

    plt.savefig(mem_png, dpi=200)

    # Clear and close
    plt.clf()
    plt.close()

    # Set up plot - runtime
    plt.boxplot([cpac_times, afni_times])
    plt.xticks(range(1, 3), ['C-PAC', 'AFNI'])
    plt.title('CPAC-AFNI runtime for: %s' % img_desc)
    plt.xlabel('Implementation')
    plt.ylabel('Runtime (seconds)')
    plt.grid()

    # Save figure
    fig = plt.gcf()
    fig.set_size_inches(16, 12)
    fig.tight_layout()

    plt.savefig(runtime_png, dpi=200)

    # Clear and close
    plt.clf()
    plt.close()