Ejemplo n.º 1
0
def submit_calculation(task, input_, cluster, image, run_parameters,
                       root_folder, container_description, input_folder,
                       output_folder, scratch_folder):
    job = _create_job(task, cluster, image, run_parameters,
                      container_description, input_folder, output_folder,
                      scratch_folder)

    girder_token = task.taskflow.girder_token
    task.taskflow.set_metadata('cluster', cluster)

    # Now download and submit job to the cluster
    task.taskflow.logger.info('Downloading input files to cluster.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=girder_token,
                               submit=False)
    task.taskflow.logger.info('Downloading complete.')

    task.taskflow.logger.info('Submitting job %s to cluster.' % job['_id'])

    submit_job(cluster, job, girder_token=girder_token, monitor=False)

    monitor_job.apply_async(
        (cluster, job), {
            'girder_token': girder_token,
            'monitor_interval': 10
        },
        link=postprocess_job.s(input_, cluster, image, run_parameters,
                               root_folder, container_description,
                               input_folder, output_folder, scratch_folder,
                               job))
Ejemplo n.º 2
0
def submit(task, input_, cluster, run_folder, input_file, input_folder):
    job = _create_job(task, cluster, input_file, input_folder)

    girder_token = task.taskflow.girder_token
    task.taskflow.set_metadata('cluster', cluster)

    # Now download and submit job to the cluster
    task.taskflow.logger.info('Downloading input files to cluster.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=girder_token,
                               submit=False)
    task.taskflow.logger.info('Downloading complete.')

    task.taskflow.logger.info('Submitting job %s to cluster.' % job['_id'])
    girder_token = task.taskflow.girder_token

    try:
        submit_job(cluster, job, girder_token=girder_token, monitor=False)
    except:
        import traceback
        traceback.print_exc()

    monitor_job.apply_async((cluster, job), {
        'girder_token': girder_token,
        'monitor_interval': 10
    },
                            link=postprocess.s(run_folder, input_, cluster,
                                               job))
Ejemplo n.º 3
0
def submit_paraview_job(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Submitting job to cluster.')
    girder_token = task.taskflow.girder_token

    params = {}

    if 'dataDir' in kwargs:
        params['dataDir'] = kwargs['dataDir']

    if 'fileName' in kwargs:
        params['fileName'] = kwargs['fileName']

    if 'simulationJobId' in kwargs:
        params['simulationJobId'] = kwargs['simulationJobId']

    if 'sessionKey' in kwargs:
        params['sessionKey'] = kwargs['sessionKey']
        # Save the sessionKey so we can clean up the proxy entry
        task.taskflow.set_metadata('sessionKey', kwargs['sessionKey'])

    if 'numberOfSlots' in kwargs:
        params['numberOfSlots'] = kwargs['numberOfSlots']

    parallel_environment \
        = parse('config.parallelEnvironment').find(cluster)

    if parallel_environment:
        parallel_environment = parallel_environment[0].value
        params['parallelEnvironment'] = parallel_environment

    job_output_dir = get_cluster_job_output_dir(cluster)
    if job_output_dir:
        params['jobOutputDir'] = job_output_dir

    paraview_install_dir \
        = parse('config.paraview.installDir').find(cluster)
    if paraview_install_dir:
        paraview_install_dir = paraview_install_dir[0].value
        params['paraviewInstallDir'] = paraview_install_dir

    # Does the cluster have GPUs?
    params['gpu'] = has_gpus(cluster) or kwargs.get('numberOfGpusPerNode',
                                                    0) > 0

    job['params'] = params

    # Create proxy entry
    if cluster['type'] == 'ec2':
        create_proxy_entry(task, cluster, job)

    # Before we submit the job upload any file we may have been given
    upload_input(task, cluster, job, *args, **kwargs)

    submit_job(cluster,
               job,
               log_write_url=None,
               girder_token=girder_token,
               monitor=False)

    monitor_paraview_job.delay(cluster, job, *args, **kwargs)
Ejemplo n.º 4
0
def submit_pyfr_job(task, cluster,  job, *args, **kwargs):
    task.logger.info('Submitting job %s to cluster.' % job['_id'])
    girder_token = task.taskflow.girder_token

    job['params'].update(kwargs)

    submit_job(cluster, job, log_write_url=None,
                          girder_token=girder_token, monitor=False)

    monitor_pyfr_job.delay(cluster, job, *args, **kwargs)
Ejemplo n.º 5
0
def submit_pyfr_job(task, cluster,  job, *args, **kwargs):
    task.logger.info('Submitting job %s to cluster.' % job['_id'])
    girder_token = task.taskflow.girder_token

    job['params'].update(kwargs)

    submit_job(cluster, job, log_write_url=None,
                          girder_token=girder_token, monitor=False)

    monitor_pyfr_job.delay(cluster, job, *args, **kwargs)
Ejemplo n.º 6
0
def submit_paraview_job(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Submitting job to cluster.')
    girder_token = task.taskflow.girder_token

    params = {}

    if 'dataDir' in kwargs:
        params['dataDir'] = kwargs['dataDir']

    if 'fileName' in kwargs:
        params['fileName'] = kwargs['fileName']

    if 'simulationJobId' in kwargs:
        params['simulationJobId'] = kwargs['simulationJobId']

    if 'sessionKey' in kwargs:
        params['sessionKey'] = kwargs['sessionKey']
        # Save the sessionKey so we can clean up the proxy entry
        task.taskflow.set_metadata('sessionKey', kwargs['sessionKey'])

    parallel_environment \
        = parse('config.parallelEnvironment').find(cluster)

    if parallel_environment:
        parallel_environment = parallel_environment[0].value
        params['parallelEnvironment'] = parallel_environment

    params['numberOfSlots'] = 1

    job_output_dir = get_cluster_job_output_dir(cluster)
    if job_output_dir:
        params['jobOutputDir'] = job_output_dir

    paraview_install_dir \
        = parse('config.paraview.installDir').find(cluster)
    if paraview_install_dir:
        paraview_install_dir = paraview_install_dir[0].value
        params['paraviewInstallDir'] = paraview_install_dir

    # Does the cluster have GPUs?
    params['gpu'] = has_gpus(cluster) or kwargs.get('numberOfGpusPerNode', 0) > 0

    job['params'] = params

    # Create proxy entry
    if cluster['type'] == 'ec2':
        create_proxy_entry(task, cluster, job)

    # Before we submit the job upload any file we may have been given
    upload_input(task, cluster, job, *args, **kwargs)

    submit_job(cluster, job, log_write_url=None,
                          girder_token=girder_token, monitor=False)

    monitor_paraview_job.delay(cluster, job, *args, **kwargs)
Ejemplo n.º 7
0
def start(task, input_, user, cluster, image, run_parameters):
    """
    The flow is the following:
    - Dry run the container with the -d flag to obtain a description of the input/output formats
    - Convert the cjson input geometry to conform to the container's expected format
    - Run the container
    - Convert the container output format into cjson
    - Ingest the output in the database
    """
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = get_cori(client)

    if '_id' not in cluster:
        log_and_raise(task, 'Invalid cluster configurations: %s' % cluster)

    oc_folder = get_oc_folder(client)
    root_folder = client.createFolder(
        oc_folder['_id'],
        datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f"))
    # temporary folder to save the container in/out description
    description_folder = client.createFolder(root_folder['_id'], 'description')

    job = _create_description_job(task, cluster, description_folder, image,
                                  run_parameters)

    task.taskflow.logger.info(
        'Preparing job to obtain the container description.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=task.taskflow.girder_token,
                               submit=False)
    task.taskflow.logger.info(
        'Submitting job to obtain the container description.')

    submit_job(cluster,
               job,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    monitor_job.apply_async(
        (cluster, job), {
            'girder_token': task.taskflow.girder_token,
            'monitor_interval': 10
        },
        countdown=countdown(cluster),
        link=postprocess_description.s(input_, user, cluster, image,
                                       run_parameters, root_folder, job,
                                       description_folder))
Ejemplo n.º 8
0
def submit_nwchem_job(task, upstream_result):
    job = upstream_result['job']
    task.logger.info('Submitting job %s to cluster.' % job['_id'])
    girder_token = task.taskflow.girder_token
    cluster = upstream_result['cluster']

    job_params = upstream_result.copy()
    job_params.pop('cluster')
    job_params.pop('job')
    job['params'].update(job_params)

    submit_job(cluster, job, log_write_url=None,
                          girder_token=girder_token, monitor=False)

    return upstream_result
def start(task, user, cluster, image, container):
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = get_cori(client)

    if '_id' not in cluster:
        log_and_raise(task, 'Invalid cluster configurations: %s' % cluster)

    oc_folder = get_oc_folder(client)
    root_folder = client.createFolder(
        oc_folder['_id'],
        datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f"))
    # temporary folder to save the container in/out description
    folder = client.createFolder(root_folder['_id'], 'pull_folder')

    # save the pull.py script to the job directory
    with open(os.path.join(os.path.dirname(__file__), 'utils/pull.py'),
              'rb') as f:
        # Get the size of the file
        size = f.seek(0, 2)
        f.seek(0)
        name = 'pull.py'
        client.uploadFile(folder['_id'], f, name, size, parentType='folder')

    job = _create_job(task, cluster, folder, image, container)

    # Now download pull.py script to the cluster
    task.taskflow.logger.info('Preparing job to pull the container.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=task.taskflow.girder_token,
                               submit=False)

    task.taskflow.logger.info('Submitting job to pull the container.')
    submit_job(cluster,
               job,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    monitor_job.apply_async((cluster, job), {
        'girder_token': task.taskflow.girder_token,
        'monitor_interval': 10
    },
                            countdown=countdown(cluster),
                            link=postprocess_job.s(user, cluster, image, job,
                                                   folder, container))
Ejemplo n.º 10
0
def submit_paraview_job(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Submitting job to cluster.')
    girder_token = task.taskflow.girder_token

    params = {}

    if 'dataDir' in kwargs:
        params['dataDir'] = kwargs['dataDir']

    if 'fileName' in kwargs:
        params['fileName'] = kwargs['fileName']

    if 'simulationJobId' in kwargs:
        params['simulationJobId'] = kwargs['simulationJobId']

    if 'sessionKey' in kwargs:
        params['sessionKey'] = kwargs['sessionKey']
        # Save the sessionKey so we can clean up the proxy entry
        task.taskflow.set_metadata('sessionKey', kwargs['sessionKey'])

    parallel_environment \
        = parse('config.parallelEnvironment').find(cluster)

    if parallel_environment:
        parallel_environment = parallel_environment[0].value
        params['parallelEnvironment'] = parallel_environment

    job_output_dir = get_cluster_job_output_dir(cluster)
    if job_output_dir:
        params['jobOutputDir'] = job_output_dir

    paraview_install_dir \
        = parse('config.paraview.installDir').find(cluster)
    if paraview_install_dir:
        paraview_install_dir = paraview_install_dir[0].value
        params['paraviewInstallDir'] = paraview_install_dir

    job['params'] = params

    # Before we submit the job upload any file we may have been given
    upload_input(task, cluster, job, *args, **kwargs)

    submit_job(cluster, job, log_write_url=None,
                          girder_token=girder_token, monitor=False)

    monitor_paraview_job.delay(cluster, job, *args, **kwargs)
Ejemplo n.º 11
0
def submit_open_foam_job(task, cluster, job, *args, **kwargs):
    # Now download job inputs
    task.logger.info('Uploading input files to cluster.')
    download_job_input_folders(cluster,
                               job,
                               log_write_url=None,
                               girder_token=task.taskflow.girder_token,
                               submit=False)
    task.logger.info('Uploading complete.')

    # Setup job parameters
    task.taskflow.logger.info('Submitting job to cluster.')
    job['params'] = {}

    ## parallel_environment
    parallel_environment = parse('config.parallelEnvironment').find(cluster)
    if parallel_environment:
        parallel_environment = parallel_environment[0].value
        job['params']['parallelEnvironment'] = parallel_environment

    ## slots
    job['params']['numberOfSlots'] = 1

    ## output dir
    job_output_dir = get_cluster_job_output_dir(cluster)
    if job_output_dir:
        job['params']['jobOutputDir'] = job_output_dir

    # Submit job to the queue
    submit_job(cluster,
               job,
               log_write_url=None,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    # Move to the next task
    monitor_open_foam_job.delay(cluster, job, *args, **kwargs)
Ejemplo n.º 12
0
def start(task, input_, cluster, image, run_parameters):
    """
    The flow is the following:
    - Dry run the container with the -d flag to obtain a description of the input/output formats
    - Convert the cjson input geometry to conform to the container's expected format
    - Run the container
    - Convert the container output format into cjson
    - Ingest the output in the database
    """
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = _get_cori(client)

    if '_id' not in cluster:
        raise Exception('Invalid cluster configurations: %s' % cluster)

    oc_folder = _get_oc_folder(client)
    root_folder = client.createFolder(
        oc_folder['_id'],
        datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f"))
    # temporary folder to save the container in/out description
    description_folder = client.createFolder(root_folder['_id'], 'description')

    # save the pull.py script to the job directory
    with open(os.path.join(os.path.dirname(__file__), 'utils/pull.py'),
              'rb') as f:
        # Get the size of the file
        size = f.seek(0, 2)
        f.seek(0)
        name = 'pull.py'
        input_parameters_file = client.uploadFile(description_folder['_id'],
                                                  f,
                                                  name,
                                                  size,
                                                  parentType='folder')

    job = _create_description_job(task, cluster, description_folder, image,
                                  run_parameters)

    # Now download pull.py script to the cluster
    task.taskflow.logger.info(
        'Downloading description input files to cluster.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=task.taskflow.girder_token,
                               submit=False)
    task.taskflow.logger.info('Downloading complete.')

    submit_job(cluster,
               job,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    monitor_job.apply_async(
        (cluster, job), {
            'girder_token': task.taskflow.girder_token,
            'monitor_interval': 10
        },
        link=postprocess_description.s(input_, cluster, image, run_parameters,
                                       root_folder, job, description_folder))
Ejemplo n.º 13
0
def upload_output(task, _, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = kwargs['output']['folder']['id']

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{'folderId': output_folder_id, 'path': '.'}]

    upload_job_output_to_folder(cluster,
                                job,
                                log_write_url=None,
                                job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    mesh_file_id = kwargs.pop('meshFileId')

    solution_files = list(_list_solution_files(client, output_folder_id))

    if len(solution_files) == 0:
        raise Exception(
            'No solution files where produced, please check output files for errors.'
        )

    # Generate and save the first vtu file that should be loaded for this
    # run. This can then be used to know which file to open as part of any viz
    # step.
    file_names = [f['name'] for f in solution_files]
    file_names.sort()
    if len(file_names) > 1:
        vtu_file_first = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
        # find where to put the ...*...
        head = tail = size = len(vtu_file_first)
        for fileName in file_names[1:]:
            vtu_name = '%s.vtu' % fileName.rsplit('.', 1)[0]
            for i in range(size):
                if vtu_file_first[i] != vtu_name[i] and head > i:
                    head = i  # not included in the cut
                if vtu_file_first[-i] != vtu_name[-i] and tail >= i:
                    tail = i - 1

        vtu_file = '%s*%s' % (vtu_file_first[:head], vtu_file_first[-tail:])
    else:
        vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
    task.taskflow.set_metadata('vtuFile', vtu_file)

    number_files = len(solution_files)

    # By default export solution files to VTK format using a set of batch jobs
    if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']:

        number_of_jobs = kwargs['numberOfProcs']
        task.logger.info('Generating %d export jobs' % number_of_jobs)

        sim_job_dir = job['dir']
        jobs = []
        job_index = 1
        for chunk in [
                solution_files[i::number_of_jobs]
                for i in xrange(number_of_jobs)
        ]:
            if chunk:
                name = 'pyfr_export_%d' % job_index
                mesh_filename = kwargs['meshFilename']
                export_job = create_export_job(task, name, chunk, sim_job_dir,
                                               mesh_filename)
                submit_job(cluster,
                           export_job,
                           log_write_url=None,
                           girder_token=task.taskflow.girder_token,
                           monitor=False)
                jobs.append(export_job)
                job_index += 1

        # Update the jobs list in the metadata
        task.taskflow.set_metadata('jobs', [j for j in jobs] + [job])
        # Also save just the export job ids
        task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs])

        monitor_jobs.apply_async(
            (cluster, jobs), {'girder_token': task.taskflow.girder_token},
            link=upload_export_output.s(cluster, job, *args, **kwargs))
    else:
        # The number 100 is pretty arbitrary!
        if number_files < 100:
            export_output.delay(output_folder_id, mesh_file_id, solution_files)
        # Break into chunks a run in parallel
        else:
            for chunk in [
                    solution_files[i::NUMBER__OF_EXPORT_TASKS]
                    for i in xrange(NUMBER__OF_EXPORT_TASKS)
            ]:
                export_output.delay(output_folder_id, mesh_file_id, chunk)
Ejemplo n.º 14
0
    def test_submit_job(self, get_connection, *args):

        cluster = {
            '_id': 'bob',
            'type': 'ec2',
            'name': 'dummy',
            'config': {
                '_id': 'dummy',
                'scheduler': {
                    'type': 'sge'
                }
            },

        }
        job_id = 'dummy'
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{'tail': True,  'path': 'dummy/file/path'}]
        }

        qconf_output = ['pe_name            orte',
                        'slots              10\n',
                        'user_lists         NONE',
                        'xuser_lists        NONE',
                        'start_proc_args    /bin/true',
                        'stop_proc_args     /bin/true',
                        'allocation_rule    $pe_slots',
                        'control_slaves     FALSE',
                        'job_is_first_task  TRUE',
                        'urgency_slots      min',
                        'accounting_summary FALSE']

        qsub_output = ['Your job 74 ("test.sh") has been submitted']

        conn = get_connection.return_value.__enter__.return_value
        conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output]

        def _get_status(url, request):
            content = {
                'status': 'queued'
            }
            content = json.dumps(content).encode('utf8')
            headers = {
                'content-length': len(content),
                'content-type': 'application/json'
            }

            return httmock.response(200, content, headers, request=request)

        def _set_status(url, request):
            content = {
                'status': 'queued'
            }
            content = json.dumps(content).encode('utf8')
            headers = {
                'content-length': len(content),
                'content-type': 'application/json'
            }

            return httmock.response(200, content, headers, request=request)

        def _log(url, request):
            content = {
            }
            content = json.dumps(content).encode('utf8')
            headers = {
                'content-length': len(content),
                'content-type': 'application/json'
            }

            return httmock.response(200, content, headers, request=request)


        status_url = '/api/v1/jobs/%s/status' % job_id
        get_status = httmock.urlmatch(
            path=r'^%s$' % status_url, method='GET')(_get_status)

        status_update_url = '/api/v1/jobs/%s' % job_id
        set_status = httmock.urlmatch(
            path=r'^%s$' % status_update_url, method='PATCH')(_set_status)

        log_url = '/api/v1/jobs/%s/log' % job_id
        log = httmock.urlmatch(
            path=r'^%s$' % log_url, method='POST')(_log)

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster, job_model, log_write_url='log_write_url',
                           girder_token='girder_token')

        self.assertEqual(conn.execute.call_args_list[1],
                         mock.call('qconf -sp orte'), 'Unexpected qconf command: %s' %
                         str(conn.execute.call_args_list[0]))

        # Specifying and parallel environment
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{'tail': True,  'path': 'dummy/file/path'}],
            'params': {
                'parallelEnvironment': 'mype'
            }
        }

        qconf_output = ['pe_name            mype',
                        'slots              10\n',
                        'user_lists         NONE',
                        'xuser_lists        NONE',
                        'start_proc_args    /bin/true',
                        'stop_proc_args     /bin/true',
                        'allocation_rule    $pe_slots',
                        'control_slaves     FALSE',
                        'job_is_first_task  TRUE',
                        'urgency_slots      min',
                        'accounting_summary FALSE']

        conn.reset_mock()
        conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output]

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster, job_model, log_write_url='log_write_url',
                           girder_token='girder_token')
        self.assertEqual(conn.execute.call_args_list[1],
                         mock.call('qconf -sp mype'), 'Unexpected qconf command: %s' %
                         str(conn.execute.call_args_list[0]))

        # For traditional clusters we shouldn't try to extract slot from orte
        cluster = {
            '_id': 'dummy',
            'type': 'trad',
            'name': 'dummy',
            'config': {
                'host': 'dummy',
                'ssh': {
                    'user': '******',
                    'passphrase': 'its a secret'
                },
                'scheduler': {
                    'type': 'sge'
                }
            }
        }
        job_id = 'dummy'
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{'tail': True,  'path': 'dummy/file/path'}]
        }

        conn.reset_mock()
        conn.execute.side_effect = [['/home/test'], ['Your job 74 ("test.sh") has been submitted']]

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster, job_model, log_write_url='log_write_url',
                           girder_token='girder_token')

        # Assert that we don't try and get the number of slots
        self.assertFalse('qconf' in str(conn.execute.call_args_list), 'qconf should not be called')

        # For traditional clusters define a parallel env
        cluster = {
            '_id': 'dummy',
            'type': 'trad',
            'name': 'dummy',
            'config': {
                'host': 'dummy',
                'ssh': {
                    'user': '******',
                    'passphrase': 'its a secret'
                },
                'scheduler': {
                    'type': 'sge'
                }
            }
        }
        job_id = 'dummy'
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{'tail': True,  'path': 'dummy/file/path'}],
            'params': {
                'parallelEnvironment': 'mype'
            }
        }

        conn.reset_mock()
        conn.execute.side_effect = [['/home/test'], qconf_output, ['Your job 74 ("test.sh") has been submitted']]

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster, job_model, log_write_url='log_write_url',
                           girder_token='girder_token')

        self.assertEqual(conn.execute.call_args_list[1], mock.call('qconf -sp mype'))
        self.assertEqual(job_model['params']['numberOfSlots'], 10)
Ejemplo n.º 15
0
    def test_submit_job(self, get_connection, *args):

        cluster = {
            '_id': 'bob',
            'type': 'ec2',
            'name': 'dummy',
            'config': {
                '_id': 'dummy',
                'scheduler': {
                    'type': 'sge'
                }
            },
        }
        job_id = 'dummy'
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{
                'tail': True,
                'path': 'dummy/file/path'
            }]
        }

        qconf_output = [
            'pe_name            orte', 'slots              10\n',
            'user_lists         NONE', 'xuser_lists        NONE',
            'start_proc_args    /bin/true', 'stop_proc_args     /bin/true',
            'allocation_rule    $pe_slots', 'control_slaves     FALSE',
            'job_is_first_task  TRUE', 'urgency_slots      min',
            'accounting_summary FALSE'
        ]

        qsub_output = ['Your job 74 ("test.sh") has been submitted']

        conn = get_connection.return_value.__enter__.return_value
        conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output]

        def _get_status(url, request):
            content = {'status': 'queued'}
            content = json.dumps(content).encode('utf8')
            headers = {
                'content-length': len(content),
                'content-type': 'application/json'
            }

            return httmock.response(200, content, headers, request=request)

        def _set_status(url, request):
            content = {'status': 'queued'}
            content = json.dumps(content).encode('utf8')
            headers = {
                'content-length': len(content),
                'content-type': 'application/json'
            }

            return httmock.response(200, content, headers, request=request)

        def _log(url, request):
            content = {}
            content = json.dumps(content).encode('utf8')
            headers = {
                'content-length': len(content),
                'content-type': 'application/json'
            }

            return httmock.response(200, content, headers, request=request)

        status_url = '/api/v1/jobs/%s/status' % job_id
        get_status = httmock.urlmatch(path=r'^%s$' % status_url,
                                      method='GET')(_get_status)

        status_update_url = '/api/v1/jobs/%s' % job_id
        set_status = httmock.urlmatch(path=r'^%s$' % status_update_url,
                                      method='PATCH')(_set_status)

        log_url = '/api/v1/jobs/%s/log' % job_id
        log = httmock.urlmatch(path=r'^%s$' % log_url, method='POST')(_log)

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster,
                           job_model,
                           log_write_url='log_write_url',
                           girder_token='girder_token')

        self.assertEqual(
            conn.execute.call_args_list[1], mock.call('qconf -sp orte'),
            'Unexpected qconf command: %s' %
            str(conn.execute.call_args_list[0]))

        # Specifying and parallel environment
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{
                'tail': True,
                'path': 'dummy/file/path'
            }],
            'params': {
                'parallelEnvironment': 'mype'
            }
        }

        qconf_output = [
            'pe_name            mype', 'slots              10\n',
            'user_lists         NONE', 'xuser_lists        NONE',
            'start_proc_args    /bin/true', 'stop_proc_args     /bin/true',
            'allocation_rule    $pe_slots', 'control_slaves     FALSE',
            'job_is_first_task  TRUE', 'urgency_slots      min',
            'accounting_summary FALSE'
        ]

        conn.reset_mock()
        conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output]

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster,
                           job_model,
                           log_write_url='log_write_url',
                           girder_token='girder_token')
        self.assertEqual(
            conn.execute.call_args_list[1], mock.call('qconf -sp mype'),
            'Unexpected qconf command: %s' %
            str(conn.execute.call_args_list[0]))

        # For traditional clusters we shouldn't try to extract slot from orte
        cluster = {
            '_id': 'dummy',
            'type': 'trad',
            'name': 'dummy',
            'config': {
                'host': 'dummy',
                'ssh': {
                    'user': '******',
                    'passphrase': 'its a secret'
                },
                'scheduler': {
                    'type': 'sge'
                }
            }
        }
        job_id = 'dummy'
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{
                'tail': True,
                'path': 'dummy/file/path'
            }]
        }

        conn.reset_mock()
        conn.execute.side_effect = [[
            '/home/test'
        ], ['Your job 74 ("test.sh") has been submitted']]

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster,
                           job_model,
                           log_write_url='log_write_url',
                           girder_token='girder_token')

        # Assert that we don't try and get the number of slots
        self.assertFalse('qconf' in str(conn.execute.call_args_list),
                         'qconf should not be called')

        # For traditional clusters define a parallel env
        cluster = {
            '_id': 'dummy',
            'type': 'trad',
            'name': 'dummy',
            'config': {
                'host': 'dummy',
                'ssh': {
                    'user': '******',
                    'passphrase': 'its a secret'
                },
                'scheduler': {
                    'type': 'sge'
                }
            }
        }
        job_id = 'dummy'
        job_model = {
            '_id': job_id,
            'queueJobId': '1',
            'name': 'dummy',
            'commands': ['ls'],
            'output': [{
                'tail': True,
                'path': 'dummy/file/path'
            }],
            'params': {
                'parallelEnvironment': 'mype'
            }
        }

        conn.reset_mock()
        conn.execute.side_effect = [[
            '/home/test'
        ], qconf_output, ['Your job 74 ("test.sh") has been submitted']]

        with httmock.HTTMock(get_status, set_status, log):
            job.submit_job(cluster,
                           job_model,
                           log_write_url='log_write_url',
                           girder_token='girder_token')

        self.assertEqual(conn.execute.call_args_list[1],
                         mock.call('qconf -sp mype'))
        self.assertEqual(job_model['params']['numberOfSlots'], 10)
Ejemplo n.º 16
0
def upload_output(task, _, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = kwargs['output']['folder']['id']

    client = _create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{
        'folderId': output_folder_id,
        'path': '.'
    }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    mesh_file_id = kwargs.pop('meshFileId')

    solution_files = list(_list_solution_files(client, output_folder_id))

    if len(solution_files) == 0:
        raise Exception('No solution files where produced, please check output files for errors.')

    # Generate and save the first vtu file that should be loaded for this
    # run. This can then be used to know which file to open as part of any viz
    # step.
    file_names = [f['name'] for f in solution_files]
    file_names.sort()
    vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
    task.taskflow.set_metadata('vtuFile', vtu_file)

    number_files = len(solution_files)

    # By default export solution files to VTK format using a set of batch jobs
    if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']:

        number_of_jobs = kwargs['numberOfProcs']
        task.logger.info('Generating %d export jobs' % number_of_jobs)

        sim_job_dir = job['dir']
        jobs = []
        job_index = 1
        for chunk in [solution_files[i::number_of_jobs] for i in xrange(number_of_jobs)]:
            if chunk:
                name = 'pyfr_export_%d' % job_index
                mesh_filename = kwargs['meshFilename']
                export_job = create_export_job(
                    task, name, chunk, sim_job_dir, mesh_filename)
                submit_job(cluster, export_job, log_write_url=None,
                              girder_token=task.taskflow.girder_token, monitor=False)
                jobs.append(export_job)
                job_index += 1

        # Update the jobs list in the metadata
        task.taskflow.set_metadata('jobs', [j for j in jobs] +
                                   [job])
        # Also save just the export job ids
        task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs])

        monitor_jobs.apply_async(
            (cluster, jobs), {'girder_token': task.taskflow.girder_token},
            link=upload_export_output.s(cluster, job, *args, **kwargs))
    else:
        # The number 100 is pretty arbitrary!
        if number_files < 100:
            export_output.delay(
                output_folder_id, mesh_file_id, solution_files)
        # Break into chunks a run in parallel
        else:
            for chunk in [solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS)]:
                export_output.delay(output_folder_id, mesh_file_id, chunk)