コード例 #1
0
    def setUp(self):
        '''
        Method to instantiate input arguments for the
        cpac_pipeline.run() method via instance attributes
        
        Parameters
        ----------
        self : CPACPippelineRunTestCase
            a unittest.TestCase-inherited class

        Returns
        -------
        None
            this function does not return any values, but populates the
            instance attributes for:
            self.config_file : string
            self.sublist_file : string
            self.idx : integer
            self.config : CPAC.utils.configuration.Configuration object
            self.strategies : list [dict]
        '''

        # Import packages
        import os
        import yaml
        from CPAC.utils.configuration import Configuration

        # Init variables
        self.config_file = PIPELINE_CONFIG
        self.sublist_file = SUBJECT_LIST
        self.idx = 1
        # Init Configuration class from config_file
        self.config = Configuration(yaml.load(open(self.config_file, 'r')))
        self.strategies = STRAT_FILE
コード例 #2
0
def run(config, subject_infos):
    import subprocess
    subprocess.getoutput('source ~/.bashrc')
    import os
    import pickle
    import yaml
    import yamlordereddictloader

    c = Configuration(yaml.safe_load(open(os.path.realpath(config), 'r')))

    prep_cwas_workflow(c, pickle.load(open(subject_infos, 'r')))
コード例 #3
0
def test_motion_estimates_and_correction(run_value):
    '''Test that any truthy forkable option for 'run' throws the custom
    human-readable exception for an invalid motion_estimate_filter.
    '''
    d = {
        'FROM': 'default',
        'functional_preproc': {
            'motion_estimates_and_correction': {
                'motion_estimate_filter': {
                    'run': run_value,
                    'filter_type': 'notch',
                    'filter_order': 0,
                    'breathing_rate_min': None,
                    'breathing_rate_max': 101.5
                }
            }
        }
    }
    if bool(run_value) and run_value not in [[False], []]:
        with pytest.raises(Invalid) as e:
            Configuration(d)
        assert "func#motion_estimate_filter_valid_options" in str(e.value)
    else:
        Configuration(d)
コード例 #4
0
    def runAnalysis1(self, pipeline, sublist, p):

        import CPAC.pipeline.cpac_runner
        from CPAC.utils import Configuration
        import yamlordereddictloader
        c = Configuration(
            yaml.load(open(os.path.realpath(pipeline), 'r'),
                      Loader=yamlordereddictloader.Loader))

        plugin_args = {
            'n_procs': c.maxCoresPerParticipant,
            'memory_gb': c.maximumMemoryPerParticipant
        }

        # TODO: make this work
        if self.pids:
            #print "THERE'S SOMETHING RUNNING!"
            pass

        CPAC.pipeline.cpac_runner.run(sublist,
                                      pipeline,
                                      p,
                                      plugin='MultiProc',
                                      plugin_args=plugin_args)
コード例 #5
0
def test_trimmer():

    from CPAC.utils.trimmer import the_trimmer, is_datasink, expand_workflow, compute_datasink_dirs
    from CPAC.pipeline.cpac_pipeline import build_workflow
    from CPAC.utils.configuration import Configuration

    import os
    import pkg_resources as p

    pipe_config = \
        p.resource_filename(
            "CPAC",
            os.path.join(
                "resources",
                "configs",
                "pipeline_config_template.yml"
            )
        )

    data_config = \
        p.resource_filename(
            "CPAC",
            os.path.join(
                "resources",
                "configs",
                "data_config_S3-BIDS-ABIDE.yml"
            )
        )

    data_config = yaml.safe_load(open(data_config, 'r'))
    sub_dict = data_config[0]

    c = Configuration(yaml.safe_load(open(pipe_config, 'r')))
    temp_dir = tempfile.mkdtemp()
    c.logDirectory = temp_dir
    c.workingDirectory = temp_dir
    c.outputDirectory = temp_dir
    c.crashLogDirectory = temp_dir

    # Disable functional, let only the anatomical workflow
    c_anatomical = copy(c)
    c_anatomical.runFunctional = [0]

    wf, _, _ = build_workflow(sub_dict['subject_id'], sub_dict, c_anatomical)

    # Create fake files to trick THE TRIMMER
    exec_graph = expand_workflow(wf)
    datasinks = [n for n in exec_graph.nodes() if is_datasink(n)]
    anat_derivatives = {}
    for datasink in datasinks:
        paths = compute_datasink_dirs(exec_graph, datasink)
        anat_derivatives.update(paths)
        for (node, derivative), path in paths.items():
            os.makedirs(path)
            open(os.path.join(path, '%s.txt' % derivative), 'a').close()

    # Enable functional, so the workflow should only run this
    # and enable trimming
    c_functional = copy(c)
    c_functional.runFunctional = [1]

    wf, _, _ = build_workflow(sub_dict['subject_id'], sub_dict, c_functional)
    exec_wf, _ = the_trimmer(wf)
    exec_graph = exec_wf._graph

    datasinks = [n for n in exec_graph.nodes() if is_datasink(n)]
    func_derivatives = {}
    for datasink in datasinks:
        paths = compute_datasink_dirs(exec_graph, datasink)
        func_derivatives.update(paths)

    # Assert that the functional pipeline remove all the anatomical nodes,
    # as they were already computed
    assert set(func_derivatives.keys()).intersection(
        set(anat_derivatives.keys())) == set()
コード例 #6
0
def run_cpac_on_cluster(config_file, subject_list_file,
                        cluster_files_dir):
    '''
    Function to build a SLURM batch job submission script and
    submit it to the scheduler via 'sbatch'
    '''

    # Import packages
    import subprocess
    import getpass
    import re
    from time import strftime

    from CPAC.utils import Configuration
    from indi_schedulers import cluster_templates

    # Load in pipeline config
    try:
        pipeline_dict = yaml.safe_load(open(os.path.realpath(config_file), 'r'))
        pipeline_config = Configuration(pipeline_dict)
    except:
        raise Exception('Pipeline config is not in proper YAML format. '\
                        'Please check your file')
    # Load in the subject list
    try:
        sublist = yaml.safe_load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        raise Exception('Subject list is not in proper YAML format. '\
                        'Please check your file')

    # Init variables
    timestamp = str(strftime("%Y_%m_%d_%H_%M_%S"))
    job_scheduler = pipeline_config.resourceManager.lower()

    # For SLURM time limit constraints only, hh:mm:ss
    hrs_limit = 8 * len(sublist)
    time_limit = '%d:00:00' % hrs_limit

    # Batch file variables
    shell = subprocess.getoutput('echo $SHELL')
    user_account = getpass.getuser()
    num_subs = len(sublist)

    # Run CPAC via python -c command
    python_cpac_str = 'python -c "from CPAC.pipeline.cpac_pipeline import run; '\
                      'run(\'%(config_file)s\', \'%(subject_list_file)s\', '\
                      '%(env_arr_idx)s, \'%(pipeline_name)s\', '\
                      'plugin=\'MultiProc\', plugin_args=%(plugin_args)s)"'

    # Init plugin arguments
    plugin_args = {'n_procs': pipeline_config.maxCoresPerParticipant,
                   'memory_gb': pipeline_config.maximumMemoryPerParticipant}

    # Set up run command dictionary
    run_cmd_dict = {'config_file' : config_file,
                    'subject_list_file' : subject_list_file,
                    'pipeline_name' : pipeline_config.pipelineName,
                    'plugin_args' : plugin_args}

    # Set up config dictionary
    config_dict = {'timestamp' : timestamp,
                   'shell' : shell,
                   'job_name' : 'CPAC_' + pipeline_config.pipelineName,
                   'num_tasks' : num_subs,
                   'queue' : pipeline_config.queue,
                   'par_env' : pipeline_config.parallelEnvironment,
                   'cores_per_task' : pipeline_config.maxCoresPerParticipant,
                   'user' : user_account,
                   'work_dir' : cluster_files_dir,
                   'time_limit' : time_limit}

    # Get string template for job scheduler
    if job_scheduler == 'pbs':
        env_arr_idx = '$PBS_ARRAYID'
        batch_file_contents = cluster_templates.pbs_template
        confirm_str = '(?<=Your job-array )\d+'
        exec_cmd = 'qsub'
    elif job_scheduler == 'sge':
        env_arr_idx = '$SGE_TASK_ID'
        batch_file_contents = cluster_templates.sge_template
        confirm_str = '(?<=Your job-array )\d+'
        exec_cmd = 'qsub'
    elif job_scheduler == 'slurm':
        env_arr_idx = '$SLURM_ARRAY_TASK_ID'
        batch_file_contents = cluster_templates.slurm_template
        confirm_str = '(?<=Submitted batch job )\d+'
        exec_cmd = 'sbatch'

    # Populate rest of dictionary
    config_dict['env_arr_idx'] = env_arr_idx
    run_cmd_dict['env_arr_idx'] = env_arr_idx
    config_dict['run_cmd'] = python_cpac_str % run_cmd_dict

    # Populate string from config dict values
    batch_file_contents = batch_file_contents % config_dict
    # Write file
    batch_filepath = os.path.join(cluster_files_dir, 'cpac_submit_%s.%s' \
                                  % (timestamp, job_scheduler))
    with open(batch_filepath, 'w') as f:
        f.write(batch_file_contents)

    # Get output response from job submission
    out = subprocess.getoutput('%s %s' % (exec_cmd, batch_filepath))

    # Check for successful qsub submission
    if re.search(confirm_str, out) == None:
        err_msg = 'Error submitting C-PAC pipeline run to %s queue' \
                  % job_scheduler
        raise Exception(err_msg)

    # Get pid and send to pid file
    pid = re.search(confirm_str, out).group(0)
    pid_file = os.path.join(cluster_files_dir, 'pid.txt')
    with open(pid_file, 'w') as f:
        f.write(pid)
コード例 #7
0
def run(subject_list_file, config_file=None, p_name=None, plugin=None,
        plugin_args=None, tracking=True, num_subs_at_once=None, debug=False, test_config=False):

    # Import packages
    import subprocess
    import os
    import pickle
    import time

    from CPAC.pipeline.cpac_pipeline import run_workflow

    print('Run called with config file {0}'.format(config_file))

    if not config_file:
        import pkg_resources as p
        config_file = \
            p.resource_filename("CPAC",
                                os.path.join("resources",
                                             "configs",
                                             "pipeline_config_template.yml"))

    # Init variables
    sublist = None
    if '.yaml' in subject_list_file or '.yml' in subject_list_file:
        subject_list_file = os.path.realpath(subject_list_file)
    else:
        from CPAC.utils.bids_utils import collect_bids_files_configs, \
            bids_gen_cpac_sublist
        (file_paths, config) = collect_bids_files_configs(subject_list_file,
                                                          None)
        sublist = bids_gen_cpac_sublist(subject_list_file, file_paths,
                                        config, None)
        if not sublist:
            import sys
            print("Did not find data in {0}".format(subject_list_file))
            sys.exit(1)

    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    # Load in pipeline config file
    config_file = os.path.realpath(config_file)
    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.safe_load(open(config_file, 'r')))
    except IOError:
        print("config file %s doesn't exist" % config_file)
        raise
    except yaml.parser.ParserError as e:
        error_detail = "\"%s\" at line %d" % (
            e.problem,
            e.problem_mark.line
        )
        raise Exception(
            "Error parsing config file: {0}\n\n"
            "Error details:\n"
            "    {1}"
            "\n\n".format(config_file, error_detail)
        )
    except Exception as e:
        raise Exception(
            "Error parsing config file: {0}\n\n"
            "Error details:\n"
            "    {1}"
            "\n\n".format(config_file, e)
        )

    c.logDirectory = os.path.abspath(c.logDirectory)
    c.workingDirectory = os.path.abspath(c.workingDirectory)
    if 's3://' not in c.outputDirectory:
        c.outputDirectory = os.path.abspath(c.outputDirectory)
    c.crashLogDirectory = os.path.abspath(c.crashLogDirectory)

    if debug:
        c.write_debugging_outputs = "[1]"

    if num_subs_at_once:
        if not str(num_subs_at_once).isdigit():
            raise Exception('[!] Value entered for --num_cores not a digit.')
        c.numParticipantsAtOnce = int(num_subs_at_once)

    # Do some validation
    if not c.workingDirectory:
        raise Exception('Working directory not specified')

    if len(c.workingDirectory) > 70:
        warnings.warn("We recommend that the working directory full path "
                      "should have less then 70 characters. "
                      "Long paths might not work in your operational system.")
        warnings.warn("Current working directory: %s" % c.workingDirectory)

    # Get the pipeline name
    p_name = p_name or c.pipelineName

    # Load in subject list
    try:
        if not sublist:
            sublist = yaml.safe_load(open(subject_list_file, 'r'))
    except:
        print("Subject list is not in proper YAML format. Please check " \
              "your file")
        raise Exception

    # Populate subject scan map
    sub_scan_map = {}
    try:
        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id'] + "_" + sub["unique_id"]
            else:
                s = sub['subject_id']
            scan_ids = ['scan_anat']

            if 'func' in sub:
                for id in sub['func']:
                    scan_ids.append('scan_'+ str(id))

            if 'rest' in sub:
                for id in sub['rest']:
                    scan_ids.append('scan_'+ str(id))

            sub_scan_map[s] = scan_ids
    except:
        print("\n\n" + "ERROR: Subject list file not in proper format - " \
              "check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n")
        raise Exception

    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))

    if tracking:
        try:
            track_run(level='participant', participants=len(sublist))
        except:
            pass

    # If we're running on cluster, execute job scheduler
    if c.runOnGrid:

        # Create cluster log dir
        cluster_files_dir = os.path.join(c.logDirectory, 'cluster_files')
        if not os.path.exists(cluster_files_dir):
            os.makedirs(cluster_files_dir)

        # Check if its a condor job, and run that
        if 'condor' in c.resourceManager.lower():
            run_condor_jobs(c, config_file, subject_list_file, p_name)
        # All other schedulers are supported
        else:
            run_cpac_on_cluster(config_file, subject_list_file, cluster_files_dir)

    # Run on one computer
    else:

        if not os.path.exists(c.workingDirectory):
            try:
                os.makedirs(c.workingDirectory)
            except:
                err = "\n\n[!] CPAC says: Could not create the working " \
                      "directory: %s\n\nMake sure you have permissions " \
                      "to write to this directory.\n\n" % c.workingDirectory
                raise Exception(err)

        # If it only allows one, run it linearly
        if c.numParticipantsAtOnce == 1:
            for sub in sublist:
                run_workflow(sub, c, True, pipeline_timing_info,
                              p_name, plugin, plugin_args, test_config)
            return

        pid = open(os.path.join(c.workingDirectory, 'pid.txt'), 'w')

        # Init job queue
        job_queue = []

        # Allocate processes
        processes = [
            Process(target=run_workflow,
                    args=(sub, c, True, pipeline_timing_info,
                          p_name, plugin, plugin_args, test_config))
            for sub in sublist
        ]

        # If we're allocating more processes than are subjects, run them all
        if len(sublist) <= c.numParticipantsAtOnce:
            for p in processes:
                p.start()
                print(p.pid, file=pid)

        # Otherwise manage resources to run processes incrementally
        else:
            idx = 0
            while idx < len(sublist):
                # If the job queue is empty and we haven't started indexing
                if len(job_queue) == 0 and idx == 0:
                    # Init subject process index
                    idc = idx
                    # Launch processes (one for each subject)
                    for p in processes[idc: idc+c.numParticipantsAtOnce]:
                        p.start()
                        print(p.pid, file=pid)
                        job_queue.append(p)
                        idx += 1
                # Otherwise, jobs are running - check them
                else:
                    # Check every job in the queue's status
                    for job in job_queue:
                        # If the job is not alive
                        if not job.is_alive():
                            # Find job and delete it from queue
                            print('found dead job ', job)
                            loc = job_queue.index(job)
                            del job_queue[loc]
                            # ...and start the next available process
                            # (subject)
                            processes[idx].start()
                            # Append this to job queue and increment index
                            job_queue.append(processes[idx])
                            idx += 1
                    # Add sleep so while loop isn't consuming 100% of CPU
                    time.sleep(2)
        # Close PID txt file to indicate finish
        pid.close()
コード例 #8
0
        upgrade_pipeline_to_1_8(updated_config)
        c = load_yaml_config(updated_config, args.aws_input_creds)

    overrides = {}
    if hasattr(args, 'pipeline_override') and args.pipeline_override:
        overrides = {
            k: v
            for d in args.pipeline_override for k, v in d.items()
        }
        c = update_nested_dict(c, overrides)

    if args.anat_only:
        c = update_nested_dict(c, {'FROM': 'anat-only'})

    c = Configuration(c)

    # get the aws_input_credentials, if any are specified
    if args.aws_input_creds:
        c['awsCredentialsFile'] = resolve_aws_credential(args.aws_input_creds)

    if args.aws_output_creds:
        c['pipeline_setup']['Amazon-AWS'][
            'aws_output_bucket_credentials'] = resolve_aws_credential(
                args.aws_output_creds)

    c['pipeline_setup']['output_directory']['path'] = os.path.join(
        args.output_dir, "output")

    if "s3://" not in args.output_dir.lower():
        c['pipeline_setup']['log_directory']['path'] = os.path.join(
コード例 #9
0
def run(subject_list_file,
        config_file=None,
        p_name=None,
        plugin=None,
        plugin_args=None,
        tracking=True,
        num_subs_at_once=None,
        debug=False,
        test_config=False):

    # Import packages
    import subprocess
    import os
    import pickle
    import time

    from CPAC.pipeline.cpac_pipeline import run_workflow

    print('Run called with config file {0}'.format(config_file))

    if not config_file:
        import pkg_resources as p
        config_file = \
            p.resource_filename("CPAC",
                                os.path.join("resources",
                                             "configs",
                                             "pipeline_config_template.yml"))

    # Init variables
    sublist = None
    if '.yaml' in subject_list_file or '.yml' in subject_list_file:
        subject_list_file = os.path.realpath(subject_list_file)
    else:
        from CPAC.utils.bids_utils import collect_bids_files_configs, \
            bids_gen_cpac_sublist
        (file_paths,
         config) = collect_bids_files_configs(subject_list_file, None)
        sublist = bids_gen_cpac_sublist(subject_list_file, file_paths, config,
                                        None)
        if not sublist:
            import sys
            print("Did not find data in {0}".format(subject_list_file))
            sys.exit(1)

    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    # Load in pipeline config file
    config_file = os.path.realpath(config_file)
    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.safe_load(open(config_file, 'r')))
    except IOError:
        print("config file %s doesn't exist" % config_file)
        raise
    except yaml.parser.ParserError as e:
        error_detail = "\"%s\" at line %d" % (e.problem, e.problem_mark.line)
        raise Exception("Error parsing config file: {0}\n\n"
                        "Error details:\n"
                        "    {1}"
                        "\n\n".format(config_file, error_detail))
    except Exception as e:
        raise Exception("Error parsing config file: {0}\n\n"
                        "Error details:\n"
                        "    {1}"
                        "\n\n".format(config_file, e))

    c.logDirectory = os.path.abspath(c.logDirectory)
    c.workingDirectory = os.path.abspath(c.workingDirectory)
    if 's3://' not in c.outputDirectory:
        c.outputDirectory = os.path.abspath(c.outputDirectory)
    c.crashLogDirectory = os.path.abspath(c.crashLogDirectory)

    if debug:
        c.write_debugging_outputs = "[1]"

    if num_subs_at_once:
        if not str(num_subs_at_once).isdigit():
            raise Exception('[!] Value entered for --num_cores not a digit.')
        c.numParticipantsAtOnce = int(num_subs_at_once)

    # Do some validation
    if not c.workingDirectory:
        raise Exception('Working directory not specified')

    if len(c.workingDirectory) > 70:
        warnings.warn("We recommend that the working directory full path "
                      "should have less then 70 characters. "
                      "Long paths might not work in your operational system.")
        warnings.warn("Current working directory: %s" % c.workingDirectory)

    # Get the pipeline name
    p_name = p_name or c.pipelineName

    # Load in subject list
    try:
        if not sublist:
            sublist = yaml.safe_load(open(subject_list_file, 'r'))
    except:
        print("Subject list is not in proper YAML format. Please check " \
              "your file")
        raise Exception

    # Populate subject scan map
    sub_scan_map = {}
    try:
        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id'] + "_" + sub["unique_id"]
            else:
                s = sub['subject_id']
            scan_ids = ['scan_anat']

            if 'func' in sub:
                for id in sub['func']:
                    scan_ids.append('scan_' + str(id))

            if 'rest' in sub:
                for id in sub['rest']:
                    scan_ids.append('scan_' + str(id))

            sub_scan_map[s] = scan_ids
    except:
        print("\n\n" + "ERROR: Subject list file not in proper format - " \
              "check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n")
        raise Exception

    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))

    if tracking:
        try:
            track_run(level='participant' if not test_config else 'test',
                      participants=len(sublist))
        except:
            print("Usage tracking failed for this run.")

    # If we're running on cluster, execute job scheduler
    if c.runOnGrid:

        # Create cluster log dir
        cluster_files_dir = os.path.join(c.logDirectory, 'cluster_files')
        if not os.path.exists(cluster_files_dir):
            os.makedirs(cluster_files_dir)

        # Check if its a condor job, and run that
        if 'condor' in c.resourceManager.lower():
            run_condor_jobs(c, config_file, subject_list_file, p_name)
        # All other schedulers are supported
        else:
            run_cpac_on_cluster(config_file, subject_list_file,
                                cluster_files_dir)

    # Run on one computer
    else:
        # Create working dir
        if not os.path.exists(c.workingDirectory):
            try:
                os.makedirs(c.workingDirectory)
            except:
                err = "\n\n[!] CPAC says: Could not create the working " \
                      "directory: %s\n\nMake sure you have permissions " \
                      "to write to this directory.\n\n" % c.workingDirectory
                raise Exception(err)
        '''
        if not os.path.exists(c.logDirectory):
            try:
                os.makedirs(c.logDirectory)
            except:
                err = "\n\n[!] CPAC says: Could not create the log " \
                      "directory: %s\n\nMake sure you have permissions " \
                      "to write to this directory.\n\n" % c.logDirectory
                raise Exception(err)
        '''

        # BEGIN LONGITUDINAL TEMPLATE PIPELINE
        if hasattr(c, 'run_longitudinal') and ('anat' in c.run_longitudinal or
                                               'func' in c.run_longitudinal):
            subject_id_dict = {}
            for sub in sublist:
                if sub['subject_id'] in subject_id_dict:
                    subject_id_dict[sub['subject_id']].append(sub)
                else:
                    subject_id_dict[sub['subject_id']] = [sub]

            # subject_id_dict has the subject_id as keys and a list of sessions for
            # each participant as value
            valid_longitudinal_data = False
            for subject_id, sub_list in subject_id_dict.items():
                if len(sub_list) > 1:
                    valid_longitudinal_data = True
                    if 'func' in c.run_longitudinal:
                        raise Exception(
                            "\n\n[!] Error: Functional longitudinal pipeline is still in development and will be available in next release. Please only run anatomical longitudinal pipeline for now.\n\n"
                        )
                    if 'anat' in c.run_longitudinal:
                        strat_list = anat_longitudinal_wf(
                            subject_id, sub_list, c)
                elif len(sub_list) == 1:
                    warnings.warn(
                        "\n\nThere is only one anatomical session for sub-%s. Longitudinal preprocessing will be skipped for this subject.\n\n"
                        % subject_id)
                # TODO
                # if 'func' in c.run_longitudinal:
                #     strat_list = func_preproc_longitudinal_wf(subject_id, sub_list, c)
                #     func_longitudinal_template_wf(subject_id, strat_list, c)

            if valid_longitudinal_data:
                rsc_file_list = []
                for dirpath, dirnames, filenames in os.walk(c.outputDirectory):
                    for f in filenames:
                        # TODO is there a better way to check output folder name?
                        if f != '.DS_Store' and 'pipeline_analysis_longitudinal' in dirpath:
                            rsc_file_list.append(os.path.join(dirpath, f))

                subject_specific_dict = {
                    subj: []
                    for subj in subject_id_dict.keys()
                }
                session_specific_dict = {
                    os.path.join(session['subject_id'], session['unique_id']):
                    []
                    for session in sublist
                }
                for rsc_path in rsc_file_list:
                    key = [
                        s for s in session_specific_dict.keys()
                        if s in rsc_path
                    ]
                    if key:
                        session_specific_dict[key[0]].append(rsc_path)
                    else:
                        subj = [
                            s for s in subject_specific_dict.keys()
                            if s in rsc_path
                        ]
                        if subj:
                            subject_specific_dict[subj[0]].append(rsc_path)

                # update individual-specific outputs:
                # anatomical_brain, anatomical_brain_mask and anatomical_reorient
                for key in session_specific_dict.keys():
                    for f in session_specific_dict[key]:
                        sub, ses = key.split('/')
                        ses_list = [
                            subj for subj in sublist
                            if sub in subj['subject_id']
                            and ses in subj['unique_id']
                        ]
                        if len(ses_list) > 1:
                            raise Exception(
                                "There are several files containing " + f)
                        if len(ses_list) == 1:
                            ses = ses_list[0]
                            subj_id = ses['subject_id']
                            tmp = f.split(c.outputDirectory)[-1]
                            keys = tmp.split(os.sep)
                            if keys[0] == '':
                                keys = keys[1:]
                            if len(keys) > 1:
                                if ses.get('resource_pool') is None:
                                    ses['resource_pool'] = {
                                        keys[0].split(c.pipelineName + '_')[-1]:
                                        {
                                            keys[-2]: f
                                        }
                                    }
                                else:
                                    strat_key = keys[0].split(c.pipelineName +
                                                              '_')[-1]
                                    if ses['resource_pool'].get(
                                            strat_key) is None:
                                        ses['resource_pool'].update(
                                            {strat_key: {
                                                keys[-2]: f
                                            }})
                                    else:
                                        ses['resource_pool'][strat_key].update(
                                            {keys[-2]: f})

                for key in subject_specific_dict:
                    for f in subject_specific_dict[key]:
                        ses_list = [
                            subj for subj in sublist if key in subj['anat']
                        ]
                        for ses in ses_list:
                            tmp = f.split(c.outputDirectory)[-1]
                            keys = tmp.split(os.sep)
                            if keys[0] == '':
                                keys = keys[1:]
                            if len(keys) > 1:
                                if ses.get('resource_pool') is None:
                                    ses['resource_pool'] = {
                                        keys[0].split(c.pipelineName + '_')[-1]:
                                        {
                                            keys[-2]: f
                                        }
                                    }
                                else:
                                    strat_key = keys[0].split(c.pipelineName +
                                                              '_')[-1]
                                    if ses['resource_pool'].get(
                                            strat_key) is None:
                                        ses['resource_pool'].update(
                                            {strat_key: {
                                                keys[-2]: f
                                            }})
                                    else:
                                        if keys[-2] == 'anatomical_brain' or keys[
                                                -2] == 'anatomical_brain_mask' or keys[
                                                    -2] == 'anatomical_skull_leaf':
                                            pass
                                        elif 'apply_warp_anat_longitudinal_to_standard' in keys[
                                                -2] or 'fsl_apply_xfm_longitudinal' in keys[
                                                    -2]:
                                            # TODO update!!!
                                            # it assumes session id == last key (ordered by session count instead of session id) + 1
                                            # might cause problem if session id is not continuous
                                            def replace_index(
                                                    target1, target2,
                                                    file_path):
                                                index1 = file_path.index(
                                                    target1) + len(target1)
                                                index2 = file_path.index(
                                                    target2) + len(target2)
                                                file_str_list = list(file_path)
                                                file_str_list[index1] = "*"
                                                file_str_list[index2] = "*"
                                                file_path_updated = "".join(
                                                    file_str_list)
                                                file_list = glob.glob(
                                                    file_path_updated)
                                                file_list.sort()
                                                return file_list

                                            if ses['unique_id'] == str(
                                                    int(keys[-2][-1]) + 1):
                                                if keys[-3] == 'seg_probability_maps':
                                                    f_list = replace_index(
                                                        'seg_probability_maps_',
                                                        'segment_prob_', f)
                                                    ses['resource_pool'][
                                                        strat_key].update(
                                                            {keys[-3]: f_list})
                                                elif keys[
                                                        -3] == 'seg_partial_volume_files':
                                                    f_list = replace_index(
                                                        'seg_partial_volume_files_',
                                                        'segment_pve_', f)
                                                    ses['resource_pool'][
                                                        strat_key].update(
                                                            {keys[-3]: f_list})
                                                else:
                                                    ses['resource_pool'][
                                                        strat_key].update({
                                                            keys[-3]:
                                                            f  # keys[-3]: 'anatomical_to_standard'
                                                        })
                                        elif keys[-2] != 'warp_list':
                                            ses['resource_pool'][
                                                strat_key].update(
                                                    {keys[-2]: f})
                                        elif keys[-2] == 'warp_list':
                                            if 'ses-' + ses['unique_id'] in tmp:
                                                ses['resource_pool'][
                                                    strat_key].update(
                                                        {keys[-2]: f})

                for key in subject_specific_dict:
                    ses_list = [
                        subj for subj in sublist if key in subj['anat']
                    ]
                    for ses in ses_list:
                        for reg_strat in strat_list:
                            try:
                                ss_strat_list = list(ses['resource_pool'])
                                for strat_key in ss_strat_list:
                                    try:
                                        ses['resource_pool'][strat_key].update(
                                            {
                                                'registration_method':
                                                reg_strat[
                                                    'registration_method']
                                            })
                                    except KeyError:
                                        pass
                            except KeyError:
                                pass

                yaml.dump(sublist,
                          open(
                              os.path.join(c.workingDirectory,
                                           'data_config_longitudinal.yml'),
                              'w'),
                          default_flow_style=False)

                print('\n\n' + 'Longitudinal pipeline completed.' + '\n\n')

                # skip main preprocessing
                if 1 not in c.runAnatomical and 1 not in c.runFunctional:
                    import sys
                    sys.exit()

        # END LONGITUDINAL TEMPLATE PIPELINE

        # If it only allows one, run it linearly
        if c.numParticipantsAtOnce == 1:
            for sub in sublist:
                run_workflow(sub, c, True, pipeline_timing_info, p_name,
                             plugin, plugin_args, test_config)
            return

        pid = open(os.path.join(c.workingDirectory, 'pid.txt'), 'w')

        # Init job queue
        job_queue = []

        # Allocate processes
        processes = [
            Process(target=run_workflow,
                    args=(sub, c, True, pipeline_timing_info, p_name, plugin,
                          plugin_args, test_config)) for sub in sublist
        ]

        # If we're allocating more processes than are subjects, run them all
        if len(sublist) <= c.numParticipantsAtOnce:
            for p in processes:
                p.start()
                print(p.pid, file=pid)

        # Otherwise manage resources to run processes incrementally
        else:
            idx = 0
            while idx < len(sublist):
                # If the job queue is empty and we haven't started indexing
                if len(job_queue) == 0 and idx == 0:
                    # Init subject process index
                    idc = idx
                    # Launch processes (one for each subject)
                    for p in processes[idc:idc + c.numParticipantsAtOnce]:
                        p.start()
                        print(p.pid, file=pid)
                        job_queue.append(p)
                        idx += 1
                # Otherwise, jobs are running - check them
                else:
                    # Check every job in the queue's status
                    for job in job_queue:
                        # If the job is not alive
                        if not job.is_alive():
                            # Find job and delete it from queue
                            print('found dead job ', job)
                            loc = job_queue.index(job)
                            del job_queue[loc]
                            # ...and start the next available process
                            # (subject)
                            processes[idx].start()
                            # Append this to job queue and increment index
                            job_queue.append(processes[idx])
                            idx += 1
                    # Add sleep so while loop isn't consuming 100% of CPU
                    time.sleep(2)
        # Close PID txt file to indicate finish
        pid.close()
コード例 #10
0
ファイル: eigen_run.py プロジェクト: zwxbupt/abide
def main(sub_idx):

    # Init variables
    bucket_name = 'fcp-indi'
    bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
    config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml'
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    local_prefix = '/mnt/eigen_run'
    sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml'

    # Pull in bucket, config, and subject
    sublist = yaml.load(open(sublist_file, 'r'))
    subject = sublist[sub_idx]
    sub_id = subject.split('_')[-1]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    c = Configuration(yaml.load(open(config_file, 'r')))

    # Test to see if theyre already upload
    to_do = True

    if to_do:
        ## Collect functional_mni list from S3 bucket
        filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id
        filt_noglobal = filt_global.replace('global1', 'global0')
        nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id
        nofilt_noglobal = nofilt_global.replace('global1', 'global0')
        s3_functional_mni_list = [
            filt_global, filt_noglobal, nofilt_global, nofilt_noglobal
        ]
        s3_functional_mni_list = [
            os.path.join(bucket_prefix, s) for s in s3_functional_mni_list
        ]

        # Download contents to local inputs directory
        try:
            aws_utils.s3_download(bucket,
                                  s3_functional_mni_list,
                                  local_prefix=os.path.join(
                                      local_prefix, 'centrality_inputs'),
                                  bucket_prefix=bucket_prefix)
        except Exception as e:
            print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id
            print 'Error: %s' % e
            return

        # Build strat dict (dictionary of strategies and local input paths)
        strat_dict = {
            'filt_global':
            os.path.join(local_prefix, 'centrality_inputs', filt_global),
            'filt_noglobal':
            os.path.join(local_prefix, 'centrality_inputs', filt_noglobal),
            'nofilt_noglobal':
            os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal),
            'nofilt_global':
            os.path.join(local_prefix, 'centrality_inputs', nofilt_global)
        }

        # Create list of processes
        proc_list = [
            Process(target=make_workflow,
                    args=(in_name, strat, sub_id, c, local_prefix))
            for strat, in_name in strat_dict.items()
        ]

        # Iterate through processes and fire off
        for p in proc_list:
            p.start()

        for p in proc_list:
            if p.is_alive():
                p.join()

        # Gather outputs
        wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))
        local_list = []
        for wf in wfs:
            for root, dirs, files in os.walk(wf):
                if files:
                    local_list.extend([os.path.join(root, f) for f in files])

        s3_list = [
            loc.replace(
                local_prefix,
                'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen'
            ) for loc in local_list
        ]

        aws_utils.s3_upload(bucket, local_list, s3_list)

        # And delete working directories
        try:
            for input_file in strat_dict.values():
                print 'removing input file %s...' % input_file
                os.remove(input_file % sub_id)
        except Exception as e:
            print 'Unable to remove input files'
            print 'Error: %s' % e

        work_dirs = glob.glob(
            os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))

        for work_dir in work_dirs:
            print 'removing %s...' % work_dir
            shutil.rmtree(work_dir)
    else:
        print 'subject %s already processed and uploaded, skipping...' % sub_id
コード例 #11
0
    def AddConfig(self, event):
        '''
        docstring
        '''

        current_dir = os.getcwd()

        # Gets called when you click 'Load' for pipeline config in the GUI
        dlg = wx.FileDialog(self,
                            message="Choose the CPAC Configuration file",
                            defaultDir=current_dir,
                            defaultFile="",
                            wildcard="YAML files(*.yaml, *.yml)|*.yaml;*.yml",
                            style=wx.OPEN | wx.CHANGE_DIR)

        # User clicks "OK"
        if dlg.ShowModal() == wx.ID_OK:
            # Load config file into memory and verify its not a subject list
            path = dlg.GetPath()

            os.chdir(current_dir)

            # Check for path existence
            if os.path.exists(path):
                path = os.path.realpath(path)
                try:
                    f_cfg = yaml.safe_load(open(path, 'r'))
                except Exception as e:
                    print('\n\nUnable to load the specified file: %s' % path)
                    print("The YAML file may not be formatted properly.")
                    print('Error:\n%s\n\n' % e)
                    raise Exception
                if type(f_cfg) == dict:
                    if 'pipelineName' not in f_cfg and 'pipeline_dir' not in f_cfg:
                        err_msg = 'File is not a pipeline configuration '\
                                  'file. It might be a data configuration file.'
                        raise Exception(err_msg)
                else:
                    err_msg = 'File is not a pipeline configuration '\
                              'file. It might be a subject list file.'
                    raise Exception(err_msg)
            # Otherwise, report error
            else:
                err_msg = 'File %s does not exist. Check and try again.' % path
                raise Exception(err_msg)

            # If config file is ok, proceed to load
            if self.check_config(path) > 0:
                while True:
                    try:
                        c = Configuration(f_cfg)
                    except Exception as e:
                        if "object has no attribute" in e:
                            err = "%s\n\nIs this attribute linked (using " \
                                  "'${}') in any of your configuration " \
                                  "parameters? (Standard template paths, " \
                                  "for example). If this is a pipeline " \
                                  "configuration file from an older version "\
                                  "of CPAC, this parameter may be obsolete. "\
                                  "Double-check your selections.\n\n" % e
                            print(err)
                        else:
                            print('\n\nERROR: Configuration file could not ' \
                                  'be loaded properly - the file might be '\
                                  'access-protected or you might have ' \
                                  'chosen the wrong file.\n')
                            print('Error name: main_window_0001\n')
                            print('Exception: %s\n\n' % e)

                    # Valid pipeline name
                    pipeline_name = None
                    try:
                        pipeline_name = c.pipelineName
                    except AttributeError:
                        pass
                    try:
                        pipeline_name = c.pipeline_dir
                        pipeline_name = pipeline_name.split('/')[-1].replace(
                            'pipeline_', '')
                        pipeline_name = 'group_config_{0}'.format(
                            pipeline_name)
                    except AttributeError:
                        pass

                    if pipeline_name != None:
                        if self.pipeline_map.get(pipeline_name) == None:
                            # this runs if you click 'Load' on the main
                            # CPAC window, enter a path, and the pipeline
                            # name attribute of the pipeline config file
                            # you are loading does NOT already exist in
                            # the listbox, i.e., the proper condition
                            self.pipeline_map[str(pipeline_name)] = path
                            self.listbox.Append(str(pipeline_name))
                            dlg.Destroy()
                            break
                        else:
                            # this runs if you click 'Load' on the main
                            # CPAC window, enter a path, and the pipeline
                            # name attribute of the pipeline config file
                            # you are loading DOES already exist in
                            # the listbox, which is a conflict
                            dlg3 = wx.MessageDialog(self, 'The \'' \
                                    'Pipeline Name\' attribute of the ' \
                                    'configuration file you are loading' \
                                    ' already exists in one of the' \
                                    ' configuration files listed under' \
                                    ' \'Pipelines\'.\n\nPlease change' \
                                    ' the pipeline name attribute (not' \
                                    ' the filename) from within the' \
                                    ' pipeline editor (under the' \
                                    ' \'Output Settings\' tab in' \
                                    ' \'Environment Setup\'), or load a' \
                                    ' new configuration file.\n\n' \
                                    'Pipeline configuration with' \
                                    ' conflicting name:\n%s' \
                                     % pipeline_name,
                                           'Conflicting Pipeline Names',
                                       wx.OK | wx.ICON_ERROR)
                            dlg3.ShowModal()
                            dlg3.Destroy()
                            break
                    # Pipeline name is None
                    else:
                        dlg4 = wx.MessageDialog(self, 'Warning: Pipeline name is blank.\n\nPlease edit' \
                                                ' the pipeline_config.yml file in a text editor and' \
                                                ' restore the pipelineName field.',
                                        'Warning',
                                wx.OK | wx.ICON_ERROR)
                        dlg4.ShowModal()
                        dlg4.Destroy()
                        dlg.Destroy
                        break
コード例 #12
0
def create_yaml_from_template(
    d, template=DEFAULT_PIPELINE_FILE, include_all=False
):
    """Save dictionary to a YAML file, keeping the structure
    (such as first level comments and ordering) from the template

    It may not be fully robust to YAML structures, but it works
    for C-PAC config files!

    Parameters
    ----------
    d : dict

    template : str
        path to template

    include_all : bool
        include every key, even those that are unchanged

    Examples
    --------
    >>> import yaml
    >>> from CPAC.utils.configuration import Configuration
    >>> Configuration(yaml.safe_load(create_yaml_from_template({}))).dict(
    ...    ) == Configuration({}).dict()
    True
    """
    def _count_indent(line):
        '''Helper method to determine indentation level

        Parameters
        ----------
        line : str

        Returns
        -------
        number_of_indents : int

        Examples
        --------
        >>> _count_indent('No indent')
        0
        >>> _count_indent('    Four spaces')
        2
        '''
        return (len(line) - len(line.lstrip())) // 2

    def _create_import_dict(diff):
        '''Method to return a dict of only changes given a nested dict
        of (dict1_value, dict2_value) tuples

        Parameters
        ----------
        diff : dict
            output of `dct_diff`

        Returns
        -------
        dict
            dict of only changed values

        Examples
        --------
        >>> _create_import_dict({'anatomical_preproc': {
        ...     'brain_extraction': {'extraction': {
        ...         'run': ([True], False),
        ...         'using': (['3dSkullStrip'], ['niworkflows-ants'])}}}})
        {'anatomical_preproc': {'brain_extraction': {'extraction': {'run': False, 'using': ['niworkflows-ants']}}}}
        '''  # noqa
        if isinstance(diff, tuple) and len(diff) == 2:
            return diff[1]
        if isinstance(diff, dict):
            i = {}
            for k in diff:
                try:
                    j = _create_import_dict(diff[k])
                    if j != {}:
                        i[k] = j
                except KeyError:
                    continue
            return i
        return diff

    def _format_key(key, level):
        '''Helper method to format YAML keys

        Parameters
        ----------
        key : str
        level : int

        Returns
        -------
        yaml : str

        Examples
        --------
        >>> _format_key('base', 0)
        '\nbase: '
        >>> _format_key('indented', 2)
        '\n    indented:'
        '''
        return f'\n{" " * level * 2}{key}: '

    def _format_list_items(l, line_level):  # noqa E741
        '''Helper method to handle lists in the YAML

        Parameters
        ----------
        l : list
        
        line_level : int
        
        Returns
        -------
        yaml : str
        
        Examples
        --------
        >>> _format_list_items([1, 2, {'nested': 3}], 0)
        '  - 1\n  - 2\n  - nested: 3'
        >>> _format_list_items([1, 2, {'nested': [3, {'deep': [4]}]}], 1)
        '    - 1\n    - 2\n    - nested:\n      - 3\n      - deep:\n        - 4'
        '''  # noqa
        # keep short, simple lists in square brackets
        if all([any([isinstance(item, item_type) for item_type in {
            str, bool, int, float
        }]) for item in l]):
            if len(str(l)) < 50:
                return str(l).replace("'", '').replace('"', '')
        # list long or complex lists on lines with indented '-' lead-ins
        indent = " " * (2 * line_level + 2)
        return '\n' + '\n'.join([
            f'{indent}{li}' for li in yaml.dump(
                yaml_bool(l)
            ).replace("'On'", 'On').replace("'Off'", 'Off').split('\n')
        ]).rstrip()

    # set starting values
    output = ''
    comment = ''
    space_match = r'^\s+.*'
    level = 0
    nest = []
    list_item = False
    list_level = 0
    line_level = 0
    template_name = template
    if isinstance(d, Configuration):
        d = d.dict()
    try:
        template = load_preconfig(template)
    except OptionError:
        if 'default' in template.lower():
            template = DEFAULT_PIPELINE_FILE
        assert os.path.exists(template) or os.path.islink(template), \
            f'{template_name} is not a defined preconfig or a valid path.'
    template_included = False

    # load default values
    d_default = Configuration(yaml.safe_load(open(template, 'r'))).dict()

    if (
        template == DEFAULT_PIPELINE_FILE or
        not dct_diff(
            yaml.safe_load(open(DEFAULT_PIPELINE_FILE, 'r')), d_default)
    ):
        template_name = 'default'

    # update values
    if include_all:
        d_default.update(d)
        d = _create_import_dict(dct_diff({}, d_default))
    else:
        d = _create_import_dict(dct_diff(d_default, d))

    # generate YAML from template with updated values
    template_dict = yaml.safe_load(open(template, 'r'))
    with open(template, 'r') as f:
        for line in f:

            # persist comments and frontmatter
            if line.startswith('%') or line.startswith('---') or re.match(
                r'^\s*#.*$', line
            ):
                list_item = False
                line = line.strip('\n')
                comment += f'\n{line}'
            elif len(line.strip()):
                if re.match(space_match, line):
                    line_level = _count_indent(line)
                else:
                    line_level = 0

                # handle lists as a unit
                if list_item:
                    if line_level < list_level - 1:
                        list_item = False
                        level = list_level
                        list_level = 0
                elif line.lstrip().startswith('-'):
                    list_item = True
                    list_level = line_level - 1

                else:
                    # extract dict key
                    key_group = re.match(
                        r'^\s*(([a-z0-9A-Z_]+://){0,1}'
                        r'[a-z0-9A-Z_/][\sa-z0-9A-Z_/\.-]+)\s*:', line)
                    if key_group:
                        if not template_included:
                            # prepend comment from template
                            if len(comment.strip()):
                                comment = re.sub(
                                    r'(?<=# based on )(.* pipeline)',
                                    f'{template_name} pipeline',
                                    comment
                                )
                                output += comment
                                output += f'\nFROM: {template_name}\n'
                                comment = ''
                            template_included = True
                        key = key_group.group(1).strip()

                        # calculate key depth
                        if line_level == level:
                            if level > 0:
                                nest = nest[:-1] + [key]
                            else:
                                nest = [key]
                        elif line_level == level + 1:
                            nest += [key]
                        elif line_level < level:
                            nest = nest[:line_level] + [key]

                        # only include updated and new values
                        try:
                            # get updated value for key
                            value = lookup_nested_value(d, nest)
                            orig_value = lookup_nested_value(d_default, nest)
                            # Use 'On' and 'Off' for bools
                            if (isinstance(orig_value, bool) or (
                                isinstance(orig_value, str) and
                                orig_value in {'On', 'Off'}
                            ) or (isinstance(orig_value, list) and all([(
                                isinstance(orig_item, bool) or (
                                    isinstance(orig_item, str) and
                                    orig_item in {'On', 'Off'}
                                )
                            ) for orig_item in orig_value])
                            )):
                                value = yaml_bool(value)
                            # prepend comment from template
                            if len(comment.strip()):
                                output += comment
                            else:
                                output += '\n'

                            # write YAML
                            output += _format_key(key, line_level)
                            if isinstance(value, list):
                                output += _format_list_items(
                                    value, line_level)
                            elif isinstance(value, dict):
                                for k in value.keys():
                                    try:
                                        lookup_nested_value(template_dict, nest + [k])
                                    # include keys not in template
                                    except KeyError:
                                        output += _format_key(
                                            k, line_level + 1)
                                        output += _format_list_items(
                                            value[k],
                                            line_level + 1
                                        ) if isinstance(
                                            value[k], list) else yaml_bool(
                                                value[k])
                            else:
                                output += str(value)
                        except KeyError:
                            # clear comment for excluded key
                            comment = '\n'

                        # reset variables for loop
                        comment = '\n'
                        level = line_level
            elif len(comment) > 1 and comment[-2] != '\n':
                comment += '\n'
    return output.lstrip('\n')