Example #1
0
    def run(self, parallel=None, n_jobs=None):
        """
        Run the the analysis: load data, run estimation, output results
        """
        if parallel is None:
            result = self.execute()
        elif parallel == 'local':
            cfg_parallel = pyhrf.cfg['parallel-local']
            try:
                from joblib import Parallel, delayed
            except ImportError:
                raise Exception('Can not import joblib. It is required to '\
                                'enable parallel processing on a local machine.')

            parallel_verb = pyhrf.verbose.verbosity
            if pyhrf.verbose.verbosity == 6:
                parallel_verb = 10

            if n_jobs is None:
                n_jobs = cfg_parallel['nb_procs']

            p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
            result = p(delayed(exec_t)(t) for t in self.split(output_dir=None))
            # join list of lists:
            result = list(itertools.chain.from_iterable(result))

        elif parallel == 'LAN':

            from pyhrf import grid
            cfg_parallel = pyhrf.cfg['parallel-LAN']
            remoteUser = cfg_parallel['user']

            #1. Some checks on input/output directory
            remoteDir = cfg_parallel['remote_path']
            # At the end, results will be retrieved direclty from remoteDir,
            # which has to be readable
            if remoteDir is None or not op.exists(remoteDir):
                raise Exception('Remote directory is not readable (%s).' \
                                'Consider mounting it with sshfs.'
                                %remoteDir)

            # Try if remoteDir is writeable, so that we don't need to upload
            # data via ssh
            remote_writeable = False
            if os.access(remoteDir, os.W_OK):
                remote_writeable = True
                tmpDir = remoteDir
            else:
                pyhrf.verbose(1, 'Remote dir is not writeable -> using tmp ' \
                                  'dir to store splitted data & then upload.')

            #2. split roi data
            pyhrf.verbose(1, 'Path to store sub treatments: %s' %tmpDir)
            treatments_dump_files = []
            self.split(dump_sub_results=True, output_dir=tmpDir,
                       make_sub_outputs=False,
                       output_file_list=treatments_dump_files)

            #3. copy data to remote directory
            if not remote_writeable:
                host = cfg_parallel['remote_host']
                pyhrf.verbose(1, 'Uploading data to %s ...' %(remoteDir))
                remote_input_files = remote_copy(treatments_dump_files,
                                                 host, remoteUser, remoteDir)

            #4. create job list
            tasks_list = []
            for f in treatments_dump_files:
                f = op.join(remoteDir,op.basename(f))
                nice = cfg_parallel['niceness']
                tasks_list.append('nice -n %d %s -v%d -t "%s"' \
                                      %(nice,'pyhrf_jde_estim',
                                        pyhrf.verbose.verbosity,f))
            mode = 'dispatch'
            tasks = grid.read_tasks(';'.join(tasks_list), mode)
            timeslot = grid.read_timeslot('allday')
            hosts = grid.read_hosts(cfg_parallel['hosts'])


            if self.output_dir is not None:
                brokenfile = op.join(self.output_dir, 'pyhrf-broken_cmd.batch')
                logfile = op.join(self.output_dir, 'pyhrf-parallel.log')
                pyhrf.verbose(1, 'Log file for process dispatching: %s' \
                              %logfile)
            else:
                brokenfile = None
                logfile = None

            #3. launch them
            pyhrf.verbose(1, 'Dispatching processes ...')
            try:
                grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile,
                              logfile, user=remoteUser)
                grid.kill_threads()
            except KeyboardInterrupt:
                grid.quit(None, None)

            if brokenfile is not None and len(open(brokenfile).readlines()) > 0:
                pyhrf.verbose(1, 'There are some broken commands, '\
                                  'trying again ...')
                try:
                    tasks = grid.read_tasks(brokenfile, mode)
                    grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile,
                                  logfile, user=remoteUser)
                    grid.kill_threads()
                except KeyboardInterrupt:
                    grid.quit(None, None)

            #3.1 grab everything back ??
            #try:
                # "scp %s@%s:%s %s" %(remoteUser,host,
                #                     op.join(remoteDir,'result*'),
                #                     op.abspath(op.dirname(options.cfgFile))))
            #TODO : test if everything went fine

            #4. merge all results and create outputs
            result = []
            #if op.exists(remoteDir): TODO :scp if remoteDir not readable
            nb_treatments = len(treatments_dump_files)
            remote_result_files = [op.join(remoteDir, 'result_%04d.pck' %i) \
                                    for i in range(nb_treatments)]
            pyhrf.verbose(1,'remote_result_files: %s' %str(remote_result_files))
            nres = len(filter(op.exists,remote_result_files))
            if nres == nb_treatments:
                pyhrf.verbose(1, 'Grabbing results ...')
                for fnresult in remote_result_files:
                    fresult = open(fnresult)
                    result.append(cPickle.load(fresult)[0])
                    fresult.close()
            else:
                print 'Found only %d result files (expected %d)' \
                    %(nres, nb_treatments)
                print 'Something went wrong, check the log files'
            if not remote_writeable:
                pyhrf.verbose(1, 'Cleaning tmp dir (%s)...' %tmpDir)
                shutil.rmtree(tmpDir)
                pyhrf.verbose(1, 'Cleaning up remote dir (%s) through ssh ...' \
                                %remoteDir)
                cmd = 'ssh %s@%s rm -f "%s" "%s" ' \
                    %(remoteUser, host, ' '.join(remote_result_files),
                      ' '.join(remote_input_files))
                pyhrf.verbose(2, cmd)
                os.system(cmd)
            else:
                if 0:
                    pyhrf.verbose(1, 'Cleaning up remote dir (%s)...' %remoteDir)
                    for f in os.listdir(remoteDir):
                        os.remove(op.join(remoteDir,f))

        elif parallel == 'cluster':

            from pyhrf.parallel import run_soma_workflow
            cfg = pyhrf.cfg['parallel-cluster']
            #create tmp remote path:
            date_now = time.strftime('%c').replace(' ','_').replace(':','_')
            remote_path = op.join(cfg['remote_path'], date_now)
            pyhrf.verbose(1,'Create tmp remote dir: %s' %remote_path)
            remote_mkdir(cfg['server'], cfg['user'], remote_path)
            #if self.result_dump_file
            t_name = 'default_treatment'
            tmp_dir = pyhrf.get_tmp_path()
            label_for_cluster = self.analyser.get_label()
            if self.output_dir is None:
                out_dir = pyhrf.get_tmp_path()
            else:
                out_dir = self.output_dir
            result = run_soma_workflow({t_name:self}, 'pyhrf_jde_estim',
                                       {t_name:tmp_dir}, cfg['server_id'],
                                       cfg['server'], cfg['user'],
                                       {t_name:remote_path},
                                       {t_name:op.abspath(out_dir)},
                                       label_for_cluster, wait_ending=True)

        else:
            raise Exception('Parallel mode "%s" not available' %parallel)

        pyhrf.verbose(1, 'Retrieved %d results' %len(result))
        return self.output(result, (self.result_dump_file is not None),
                           self.make_outputs)
Example #2
0
    def run(self, parallel=None, n_jobs=None):
        """
        Run the analysis: load data, run estimation, output results
        """
        if parallel is None:
            result = self.execute()
        elif parallel == 'local':
            cfg_parallel = pyhrf.cfg['parallel-local']
            try:
                from joblib import Parallel, delayed
            except ImportError:
                raise Exception(
                    'Can not import joblib. It is required to '
                    'enable parallel processing on a local machine.')

            effective_level = logger.getEffectiveLevel()
            if effective_level == logging.DEBUG:
                parallel_verb = 11
            elif effective_level == logging.INFO:
                parallel_verb = 2
            else:
                parallel_verb = 0

            if n_jobs is None:
                if cfg_parallel["nb_procs"]:
                    n_jobs = cfg_parallel["nb_procs"]
                else:
                    n_jobs = available_cpu_count()

            p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
            result = p(delayed(exec_t)(t) for t in self.split(output_dir=None))
            # join list of lists:
            result = list(itertools.chain.from_iterable(result))

        elif parallel == 'LAN':

            from pyhrf import grid
            cfg_parallel = pyhrf.cfg['parallel-LAN']
            remoteUser = cfg_parallel['user']

            # 1. Some checks on input/output directory
            remoteDir = cfg_parallel['remote_path']
            # At the end, results will be retrieved direclty from remoteDir,
            # which has to be readable
            if remoteDir is None or not op.exists(remoteDir):
                raise Exception('Remote directory is not readable (%s).'
                                'Consider mounting it with sshfs.' % remoteDir)

            # Try if remoteDir is writeable, so that we don't need to upload
            # data via ssh
            remote_writeable = False
            if os.access(remoteDir, os.W_OK):
                remote_writeable = True
                tmpDir = remoteDir
            else:
                logger.info('Remote dir is not writeable -> using tmp '
                            'dir to store splitted data & then upload.')

            # 2. split roi data
            logger.info('Path to store sub treatments: %s', tmpDir)
            treatments_dump_files = []
            self.split(dump_sub_results=True,
                       output_dir=tmpDir,
                       make_sub_outputs=False,
                       output_file_list=treatments_dump_files)

            # 3. copy data to remote directory
            if not remote_writeable:
                host = cfg_parallel['remote_host']
                logger.info('Uploading data to %s ...', remoteDir)
                remote_input_files = remote_copy(treatments_dump_files, host,
                                                 remoteUser, remoteDir)

            # 4. create job list
            tasks_list = []
            for f in treatments_dump_files:
                f = op.join(remoteDir, op.basename(f))
                nice = cfg_parallel['niceness']
                tasks_list.append(
                    'nice -n %d %s -v%d -t "%s"' %
                    (nice, 'pyhrf_jde_estim', logger.getEffectiveLevel(), f))
            mode = 'dispatch'
            tasks = grid.read_tasks(';'.join(tasks_list), mode)
            timeslot = grid.read_timeslot('allday')
            hosts = grid.read_hosts(cfg_parallel['hosts'])

            if self.output_dir is not None:
                brokenfile = op.join(self.output_dir, 'pyhrf-broken_cmd.batch')
                logfile = op.join(self.output_dir, 'pyhrf-parallel.log')
                logger.info('Log file for process dispatching: %s', logfile)
            else:
                brokenfile = None
                logfile = None

            # 3. launch them
            logger.info('Dispatching processes ...')
            try:
                grid.run_grid(mode,
                              hosts,
                              'rsa',
                              tasks,
                              timeslot,
                              brokenfile,
                              logfile,
                              user=remoteUser)
                grid.kill_threads()
            except KeyboardInterrupt:
                grid.quit(None, None)

            if brokenfile is not None and len(
                    open(brokenfile).readlines()) > 0:
                logger.info('There are some broken commands, trying again ...')
                try:
                    tasks = grid.read_tasks(brokenfile, mode)
                    grid.run_grid(mode,
                                  hosts,
                                  'rsa',
                                  tasks,
                                  timeslot,
                                  brokenfile,
                                  logfile,
                                  user=remoteUser)
                    grid.kill_threads()
                except KeyboardInterrupt:
                    grid.quit(None, None)

            # 3.1 grab everything back ??
            # try:
            # "scp %s@%s:%s %s" %(remoteUser,host,
            #                     op.join(remoteDir,'result*'),
            #                     op.abspath(op.dirname(options.cfgFile))))
            # TODO : test if everything went fine

            # 4. merge all results and create outputs
            result = []
            # if op.exists(remoteDir): TODO :scp if remoteDir not readable
            nb_treatments = len(treatments_dump_files)
            remote_result_files = [
                op.join(remoteDir, 'result_%04d.pck' % i)
                for i in range(nb_treatments)
            ]
            logger.info('remote_result_files: %s', str(remote_result_files))
            nres = len(filter(op.exists, remote_result_files))
            if nres == nb_treatments:
                logger.info('Grabbing results ...')
                for fnresult in remote_result_files:
                    fresult = open(fnresult)
                    result.append(cPickle.load(fresult)[0])
                    fresult.close()
            else:
                print 'Found only %d result files (expected %d)' \
                    % (nres, nb_treatments)
                print 'Something went wrong, check the log files'
            if not remote_writeable:
                logger.info('Cleaning tmp dir (%s)...', tmpDir)
                shutil.rmtree(tmpDir)
                logger.info('Cleaning up remote dir (%s) through ssh ...',
                            remoteDir)
                cmd = 'ssh %s@%s rm -f "%s" "%s" ' \
                    % (remoteUser, host, ' '.join(remote_result_files),
                       ' '.join(remote_input_files))
                logger.info(cmd)
                os.system(cmd)
            else:
                if 0:
                    logger.info('Cleaning up remote dir (%s)...', remoteDir)
                    for f in os.listdir(remoteDir):
                        os.remove(op.join(remoteDir, f))

        elif parallel == 'cluster':

            from pyhrf.parallel import run_soma_workflow
            cfg = pyhrf.cfg['parallel-cluster']
            # create tmp remote path:
            date_now = time.strftime('%c').replace(' ', '_').replace(':', '_')
            remote_path = op.join(cfg['remote_path'], date_now)
            logger.info('Create tmp remote dir: %s', remote_path)
            remote_mkdir(cfg['server'], cfg['user'], remote_path)
            t_name = 'default_treatment'
            tmp_dir = pyhrf.get_tmp_path()
            label_for_cluster = self.analyser.get_label()
            if self.output_dir is None:
                out_dir = pyhrf.get_tmp_path()
            else:
                out_dir = self.output_dir
            result = run_soma_workflow({t_name: self},
                                       'pyhrf_jde_estim', {t_name: tmp_dir},
                                       cfg['server_id'],
                                       cfg['server'],
                                       cfg['user'], {t_name: remote_path},
                                       {t_name: op.abspath(out_dir)},
                                       label_for_cluster,
                                       wait_ending=True)

        else:
            raise Exception('Parallel mode "%s" not available' % parallel)

        logger.info('Retrieved %d results', len(result))
        return self.output(result, (self.result_dump_file is not None),
                           self.make_outputs)
Example #3
0
def prepare_treatment_jobs(treatment, tmp_local_dir, local_result_path,
                           local_user, local_host, remote_host, remote_user,
                           remote_path, label_for_cluster):
    """Prepare soma-workflow jobs to perform one treatment (i.e., one subject).

    Parameters
    ----------
    treatment : FMRITreatment
        the treatment defining the analysis
    tmp_local_dir : str
        a path where to store the temporary config file before sending it to the remote host
    local_result_path : str
        path where to store the final result
    local_user : str
        the user on the local host who enables SHH connection from the remote cluster
    local_host : str
        local host (used to send back the result)
    remote_host : str
        remote machine where the treatment will be run
    remote_user : str
        user login on the remote machine.
    remote_path : str
        path on the remote machine where to store ROI data and analysis results
    label_for_cluster : str
        label prefix to name job in soma-workflow

    Returns
    -------
    a tuple (job_split, jobs, dependencies, mainGroup)
    job_split (Job)
        job handling splitting of input data into ROI data
    jobs (list of Job)
        all jobs except the splitting jobs -> roi analyses, result merge, scp of result back to local host, data
        cleaning
    dependencies (list of job pairs)
        define the pipeline structure
    mainGroup (Group)
        top-level object gathering all jobs for this treatment.

    """

    # roiFiles contains the list of files that will be produced by job_split
    roiFiles, roiIds = treatment.dump_roi_datasets(dry=True)

    logger.info('Get list of splitted data files ... %d files', len(roiFiles))
    datafiles = treatment.get_data_files()

    # Make all path be relative in the treatment config file
    # so that data file can be found on the cluster file system
    treatment.replace_data_dir('./')
    remote_cfg_file = op.join(tmp_local_dir, './detectestim_remote.xml')
    treatment.set_init_param('make_outputs', False)
    logger.info('Save remote treatment to %s', remote_cfg_file)
    save_treatment(treatment, remote_cfg_file)

    logger.info('Upload input data')
    # All data which are the inputs of the workflow:
    data_to_upload = datafiles + [remote_cfg_file]
    remote_input_files = remote_copy(data_to_upload, remote_host,
                                     remote_user, remote_path)
    logger.info('Remove tmp remote cfg file')
    os.remove(remote_cfg_file)

    logger.info('Prepare jobs ...')
    logger.info('Job split ...')
    verbose_level = logger.getEffectiveLevel()
    cmd = ["pyhrf_split_roidata", "-c", basename(remote_cfg_file),
           "-v %d" % verbose_level, "-d", "./"]
    logger.info('-> %s', cmd)
    job_split = Job(cmd, working_directory=remote_path, name="roi_split")

    logger.info('Jobs JDE ...')
    jobs_jde = [Job(["pyhrf_jde_estim", "-c", basename(remote_cfg_file),
                     "-r", basename(roiFile), "-v %d" % verbose_level],
                    working_directory=remote_path,
                    name="jde_r%04d" % roiId)
                for roiFile, roiId in zip(roiFiles, roiIds)]
    logger.info('First jde job -> %s', jobs_jde[0].command)
    # Files produced by all JDE jobs, which will be then used as input of the
    # merge job:
    resultFiles = ["result_%04d.pck" % iroi for iroi in roiIds]

    logger.info('Job pack result ...')
    # Output of the merge job, which has to transfered back to local:
    remote_resultFile = './result.pck'
    logger.info('Remote result file: %s', remote_resultFile)

    cmd = ["pyhrf_pack_results", '-v1', '-o', remote_resultFile] + resultFiles
    logger.info('cmd pack result: %s', cmd)
    job_merge = Job(cmd, working_directory=remote_path,
                    name="merge_results")

    # Retrieve result file:
    # local_host = "132.166.200.5" #HACK
    # cmd = ["pyhrf_shell_cmd", "scp","-C",remote_resultFile, "%s@%s:\"%s\"" \
    #%(local_user,local_host,local_result_path)]
    cmd = ["scp", "-C", remote_resultFile, "%s@%s:\"%s\""
           % (local_user, local_host, local_result_path)]

    logger.info('cmd scp result: %s', cmd)
    job_scp_result = Job(cmd, working_directory=remote_path, name="scp_result")

    # Clean everything:
    # -> all input files, splitted roi data, result for each data, merged result:
    cmd = ["rm", "-f", remote_resultFile] + \
        map(basename, roiFiles) + resultFiles + remote_input_files
    logger.info('cmd clean: %s', cmd)
    job_clean = Job(cmd, working_directory=remote_path, name="clean_files")

    logger.info('Setup of work flow ...')

    # Build the Job lists, dependencies and group
    clean = True
    if clean:
        nodes = [job_merge, job_scp_result, job_clean] + jobs_jde
    else:
        nodes = [job_merge, job_scp_result] + jobs_jde
    dependencies = []
    for jj in jobs_jde:
        dependencies.append((job_split, jj))
        dependencies.append((jj, job_merge))
    dependencies.append((job_merge, job_scp_result))
    if clean:
        dependencies.append((job_scp_result, job_clean))

    jjGroup = Group(elements=jobs_jde, name=label_for_cluster + '-roi_jobs')
    if clean:
        elements = [job_split, jjGroup, job_merge,
                    job_scp_result, job_clean]
    else:
        elements = [job_split, jjGroup, job_merge,
                    job_scp_result]
    mainGroup = Group(name=label_for_cluster,
                      elements=elements)

    return job_split, nodes, dependencies, mainGroup
Example #4
0
def prepare_treatment_jobs(treatment, tmp_local_dir, local_result_path,
                           local_user, local_host, remote_host, remote_user,
                           remote_path, label_for_cluster):
    """Prepare soma-workflow jobs to perform one treatment (i.e., one subject).

    Parameters
    ----------
    treatment : FMRITreatment
        the treatment defining the analysis
    tmp_local_dir : str
        a path where to store the temporary config file before sending it to the remote host
    local_result_path : str
        path where to store the final result
    local_user : str
        the user on the local host who enables SHH connection from the remote cluster
    local_host : str
        local host (used to send back the result)
    remote_host : str
        remote machine where the treatment will be run
    remote_user : str
        user login on the remote machine.
    remote_path : str
        path on the remote machine where to store ROI data and analysis results
    label_for_cluster : str
        label prefix to name job in soma-workflow

    Returns
    -------
    a tuple (job_split, jobs, dependencies, mainGroup)
    job_split (Job)
        job handling splitting of input data into ROI data
    jobs (list of Job)
        all jobs except the splitting jobs -> roi analyses, result merge, scp of result back to local host, data
        cleaning
    dependencies (list of job pairs)
        define the pipeline structure
    mainGroup (Group)
        top-level object gathering all jobs for this treatment.

    """

    # roiFiles contains the list of files that will be produced by job_split
    roiFiles, roiIds = treatment.dump_roi_datasets(dry=True)

    logger.info('Get list of splitted data files ... %d files', len(roiFiles))
    datafiles = treatment.get_data_files()

    # Make all path be relative in the treatment config file
    # so that data file can be found on the cluster file system
    treatment.replace_data_dir('./')
    remote_cfg_file = op.join(tmp_local_dir, './detectestim_remote.xml')
    treatment.set_init_param('make_outputs', False)
    logger.info('Save remote treatment to %s', remote_cfg_file)
    save_treatment(treatment, remote_cfg_file)

    logger.info('Upload input data')
    # All data which are the inputs of the workflow:
    data_to_upload = datafiles + [remote_cfg_file]
    remote_input_files = remote_copy(data_to_upload, remote_host, remote_user,
                                     remote_path)
    logger.info('Remove tmp remote cfg file')
    os.remove(remote_cfg_file)

    logger.info('Prepare jobs ...')
    logger.info('Job split ...')
    verbose_level = logger.getEffectiveLevel()
    cmd = [
        "pyhrf_split_roidata", "-c",
        basename(remote_cfg_file),
        "-v %d" % verbose_level, "-d", "./"
    ]
    logger.info('-> %s', cmd)
    job_split = Job(cmd, working_directory=remote_path, name="roi_split")

    logger.info('Jobs JDE ...')
    jobs_jde = [
        Job([
            "pyhrf_jde_estim", "-c",
            basename(remote_cfg_file), "-r",
            basename(roiFile),
            "-v %d" % verbose_level
        ],
            working_directory=remote_path,
            name="jde_r%04d" % roiId)
        for roiFile, roiId in zip(roiFiles, roiIds)
    ]
    logger.info('First jde job -> %s', jobs_jde[0].command)
    # Files produced by all JDE jobs, which will be then used as input of the
    # merge job:
    resultFiles = ["result_%04d.pck" % iroi for iroi in roiIds]

    logger.info('Job pack result ...')
    # Output of the merge job, which has to transfered back to local:
    remote_resultFile = './result.pck'
    logger.info('Remote result file: %s', remote_resultFile)

    cmd = ["pyhrf_pack_results", '-v1', '-o', remote_resultFile] + resultFiles
    logger.info('cmd pack result: %s', cmd)
    job_merge = Job(cmd, working_directory=remote_path, name="merge_results")

    # Retrieve result file:
    # local_host = "132.166.200.5" #HACK
    # cmd = ["pyhrf_shell_cmd", "scp","-C",remote_resultFile, "%s@%s:\"%s\"" \
    #%(local_user,local_host,local_result_path)]
    cmd = [
        "scp", "-C", remote_resultFile,
        "%s@%s:\"%s\"" % (local_user, local_host, local_result_path)
    ]

    logger.info('cmd scp result: %s', cmd)
    job_scp_result = Job(cmd, working_directory=remote_path, name="scp_result")

    # Clean everything:
    # -> all input files, splitted roi data, result for each data, merged result:
    cmd = ["rm", "-f", remote_resultFile] + \
        map(basename, roiFiles) + resultFiles + remote_input_files
    logger.info('cmd clean: %s', cmd)
    job_clean = Job(cmd, working_directory=remote_path, name="clean_files")

    logger.info('Setup of work flow ...')

    # Build the Job lists, dependencies and group
    clean = True
    if clean:
        nodes = [job_merge, job_scp_result, job_clean] + jobs_jde
    else:
        nodes = [job_merge, job_scp_result] + jobs_jde
    dependencies = []
    for jj in jobs_jde:
        dependencies.append((job_split, jj))
        dependencies.append((jj, job_merge))
    dependencies.append((job_merge, job_scp_result))
    if clean:
        dependencies.append((job_scp_result, job_clean))

    jjGroup = Group(elements=jobs_jde, name=label_for_cluster + '-roi_jobs')
    if clean:
        elements = [job_split, jjGroup, job_merge, job_scp_result, job_clean]
    else:
        elements = [job_split, jjGroup, job_merge, job_scp_result]
    mainGroup = Group(name=label_for_cluster, elements=elements)

    return job_split, nodes, dependencies, mainGroup