def _create_job(task, cluster, folder, image, container):
    params = _get_job_parameters(task, cluster, image, container)
    setup_commands = params['setupCommands']
    repository = params['repository']
    tag = params['tag']
    job_parameters = params['jobParameters']

    run_command = 'python pull.py -r %s -t %s -c %s' % (repository, tag,
                                                        container)
    commands = setup_commands + [run_command, 'rm pull.py']

    body = {
        # ensure there are no special characters in the submission script name
        'name': 'desc_%s' % re.sub('[^a-zA-Z0-9]', '_', run_command),
        'commands': commands,
        'input': [{
            'folderId': folder['_id'],
            'path': '.'
        }],
        'output': [{
            'folderId': folder['_id'],
            'path': '.'
        }],
        'uploadOutput': False,
        'params': job_parameters
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    return job
Beispiel #2
0
def create_export_job(task, job_name, files, job_dir, mesh_filename):

    commands = []
    mesh_file_path = os.path.join(job_dir, 'input', mesh_filename)
    for file in files:
        name = file['name']
        vtk_filename = '%s.vtu' % name.rsplit('.', 1)[0]
        output_path = os.path.join(job_dir, vtk_filename)
        solution_file_path = os.path.join(job_dir, name)

        cmd = 'pyfr export %s %s %s' % (mesh_file_path, solution_file_path,
                                        output_path)
        commands.append(cmd)

    body = {
        'name': job_name,
        'commands': commands,
        'input': [],
        'output': [],
        'params': {
            'numberOfSlots': 1
        }
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    task.logger.info('Created export job %s' % job['_id'])

    return job
Beispiel #3
0
def _create_job_nersc(task, cluster, input_file, input_folder):
    task.taskflow.logger.info('Create NWChem job.')

    body = {
        'name':
        'nwchem_run',
        'commands': [
            '/usr/bin/srun -N 1  -n 32 %s %s' %
            (os.environ.get('OC_NWCHEM_PATH', 'nwchem'), input_file['name'])
        ],
        'input': [{
            'folderId': input_folder['_id'],
            'path': '.'
        }],
        'output': [],
        'params': {
            'taskFlowId': task.taskflow.id,
            'numberOfNodes': 1,
            'queue': 'debug',
            'constraint': 'haswell',
            'account': os.environ.get('OC_ACCOUNT')
        }
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))
    task.taskflow.set_metadata('jobs', [job])

    return job
Beispiel #4
0
def create_export_job(task, job_name, files, job_dir, mesh_filename):

    commands = []
    mesh_file_path = os.path.join(job_dir, 'input', mesh_filename)
    for file in files:
        name = file['name']
        vtk_filename = '%s.vtu' % name.rsplit('.', 1)[0]
        output_path = os.path.join(job_dir, vtk_filename)
        solution_file_path = os.path.join(job_dir, name)

        cmd = 'pyfr export %s %s %s' % (mesh_file_path,
                                       solution_file_path, output_path)
        commands.append(cmd)

    body = {
        'name': job_name,
        'commands': commands,
        'input': [],
        'output': [],
        'params': {
            'numberOfSlots': 1
        }
    }

    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    task.logger.info('Created export job %s' % job['_id'])

    return job
Beispiel #5
0
def upload_input(task, cluster, job, *args, **kwargs):
    file_id = parse('input.file.id').find(kwargs)
    if file_id:
        file_id = file_id[0].value
        task.logger.info('Visualizing file ID: %s' % file_id)
        job['params']['dataDir'] = '$PWD'

        # Fetch the file
        girder_client = create_girder_client(task.taskflow.girder_api_url,
                                             task.taskflow.girder_token)
        file = girder_client.getResource('file', file_id)

        # Set the file to load
        filename = file['name']
        job['params']['fileName'] = filename
        task.logger.info('Filename is: %s' % filename)

        # Set main script if not visualizer
        if MAIN_SCRIPT:
            job['params']['scriptToRun'] = MAIN_SCRIPT
            task.logger.info('Swap run script to: %s' % MAIN_SCRIPT)

        task.logger.info('Uploading file to cluster.')
        job_dir = job_directory(cluster, job)
        upload_file(cluster, task.taskflow.girder_token, file, job_dir)
        task.logger.info('Upload complete.')
Beispiel #6
0
def create_job(task, *args, **kwargs):
    task.logger.info('Taskflow %s' % task.taskflow.id)
    task.taskflow.logger.info('Create PyFr job.')
    input_folder_id = kwargs['input']['folder']['id']

    backend = kwargs['backend']['type']

    body = {
        'name':
        'pyfr_run',
        'commands': [
            "mpiexec -n %s pyfr run -b %s input/%s input/%s" %
            (kwargs['numberOfProcs'], backend, kwargs['meshFilename'],
             kwargs['iniFilename'])
        ],
        'input': [{
            'folderId': input_folder_id,
            'path': 'input'
        }],
        'output': [],
        'params': {
            'numberOfSlots': kwargs['numberOfProcs']
        }
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    task.taskflow.set_metadata('jobs', [job])

    submit.delay(job, *args, **kwargs)
Beispiel #7
0
def export_output(task, folder_id, imported_mesh_file_id, files):
    """
    Export a batch of PyFR solution files into VTK format.

    :param: folder_id: The target folder id to upload the VTK files to.
    :param: imported_mesh_file_id: The mesh in PyFR format
    :param: files: The files to export ( Girder JSON objects )
    """
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    output_dir = tempfile.mkdtemp()

    try:
        mesh_path = os.path.join(output_dir, 'mesh.pyfrm')

        task.logger.info('Downloading mesh.')
        client.downloadFile(imported_mesh_file_id, mesh_path)
        task.logger.info('Downloading complete.')

        for file in files:
            _export_file(task, client, folder_id, mesh_path, file, output_dir)

    finally:
        if os.path.exists(output_dir):
            shutil.rmtree(output_dir)
Beispiel #8
0
def export_output(task, folder_id, imported_mesh_file_id, files):
    """
    Export a batch of PyFR solution files into VTK format.

    :param: folder_id: The target folder id to upload the VTK files to.
    :param: imported_mesh_file_id: The mesh in PyFR format
    :param: files: The files to export ( Girder JSON objects )
    """
    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    output_dir =  tempfile.mkdtemp()

    try:
        mesh_path = os.path.join(output_dir, 'mesh.pyfrm')

        task.logger.info('Downloading mesh.')
        client.downloadFile(imported_mesh_file_id, mesh_path)
        task.logger.info('Downloading complete.')

        for file in files:
            _export_file(task, client, folder_id, mesh_path, file, output_dir)

    finally:
        if os.path.exists(output_dir):
            shutil.rmtree(output_dir)
Beispiel #9
0
def create_job(task, upstream_result):
    task.logger.info('Taskflow %s' % task.taskflow.id)
    task.taskflow.logger.info('Create NWChem job.')
    input_folder_id = upstream_result['input']['folder']['id']

    # TODO: setup command to run with mpi
    body = {
        'name': 'nwchem_run',
        'commands': [
            "mpiexec -n %s nwchem input/%s" % (
                upstream_result['numberOfProcs'],
                upstream_result['nwFilename'])
        ],
        'input': [
            {
              'folderId': input_folder_id,
              'path': 'input'
            }
        ],
        'output': [],
        'params': {
            'numberOfSlots': upstream_result['numberOfProcs']
        }
    }

    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))
    upstream_result['job'] = job

    task.taskflow.set_metadata('jobs', [job])

    return upstream_result
Beispiel #10
0
def _create_job_ec2(task, cluster, input_file, input_folder):
    task.taskflow.logger.info('Create NWChem job.')
    input_name = input_file['name']

    body = {
        'name':
        'nwchem_run',
        'commands': [
            'docker pull openchemistry/nwchem-json:latest',
            'docker run -v $(pwd):/data openchemistry/nwchem-json:latest %s' %
            (input_name)
        ],
        'input': [{
            'folderId': input_folder['_id'],
            'path': '.'
        }],
        'output': [],
        'params': {
            'taskFlowId': task.taskflow.id
        }
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))
    task.taskflow.set_metadata('jobs', [job])

    return job
Beispiel #11
0
def upload_output(task, cluster, job, *args, **kwargs):
    # Girder client
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    # Register generated file into girder
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = parse('output.folder.id').find(kwargs)
    if output_folder_id:
        task.taskflow.logger.info('Exporting to folder %s' % output_folder_id)
        output_folder_id = output_folder_id[0].value
        job['output'] = [{'folderId': output_folder_id, 'path': '.'}]
    else:
        task.taskflow.logger.info('No output.folder.id')

    # Upload files metadata
    upload_job_output_to_folder(cluster,
                                job,
                                log_write_url=None,
                                job_dir=None,
                                girder_token=task.taskflow.girder_token)

    # Done...
    task.taskflow.logger.info('Upload complete.')
Beispiel #12
0
def create_paraview_job(task, *args, **kwargs):
    _update_cluster_config(task, kwargs['cluster'])
    task.logger.info('Validating args passed to flow.')
    validate_args(kwargs)
    cluster = kwargs.pop('cluster')

    # Save the cluster in the taskflow for termination
    task.taskflow.set_metadata('cluster', cluster)

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    task.taskflow.logger.info('Creating ParaView job.')
    task.logger.info('Load ParaView submission script.')

    base_path = os.path.dirname(__file__)
    script_path = os.path.join(base_path, 'pvw.sh')

    if not os.path.exists(script_path):
        msg = 'Script path %s does not exists.' % script_path
        task.logger.info(msg)
        raise Exception(msg)

    with open(script_path, 'r') as fp:
        commands = fp.read().splitlines()

    body = {
        'name': 'paraview',
        'commands': commands,
        'input': [],
        'output': []
    }

    job = client.post('jobs', data=json.dumps(body))
    task.logger.info('ParaView job created: %s' % job['_id'])
    task.taskflow.logger.info('ParaView job created.')

    task.taskflow.set_metadata('jobs', [job])

    # Upload the visualizer code
    task.logger.info('Uploading visualizer')
    viz_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../../../../../',
                     'node_modules/pvw-visualizer/server/pvw-visualizer.py'))

    if not os.path.exists(viz_path):
        task.logger.error(
            'Unable to locate pvw-visualizer.py for upload. (%s)' % viz_path)
        return

    target_dir = job_directory(cluster, job)
    target_path = os.path.join(target_dir, 'pvw-visualizer.py')

    with get_connection(task.taskflow.girder_token, cluster) as conn:
        conn.makedirs(target_dir)
        with open(viz_path, 'r') as fp:
            conn.put(fp, target_path)

    submit_paraview_job.delay(cluster, job, *args, **kwargs)
Beispiel #13
0
def cleanup_proxy_entries(task):
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    session_key = parse('meta.sessionKey').find(task.taskflow)
    if session_key:
        session_key = session_key[0].value
        client.delete('proxy/%s' % session_key)
Beispiel #14
0
def cleanup_proxy_entries(task):
    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)

    session_key = parse('meta.sessionKey').find(task.taskflow)
    if session_key:
        session_key = session_key[0].value
        client.delete('proxy/%s' % session_key)
Beispiel #15
0
def create_paraview_job(task, *args, **kwargs):
    _update_cluster_config(task, kwargs['cluster'])
    task.logger.info('Validating args passed to flow.')
    validate_args(kwargs)
    cluster = kwargs.pop('cluster')

    # Save the cluster in the taskflow for termination
    task.taskflow.set_metadata('cluster', cluster)

    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)

    task.taskflow.logger.info('Creating ParaView job.')
    task.logger.info('Load ParaView submission script.')

    base_path = os.path.dirname(__file__)
    script_path = os.path.join(base_path, 'pvw.sh')

    if not os.path.exists(script_path):
        msg = 'Script path %s does not exists.' % script_path
        task.logger.info(msg)
        raise Exception(msg)

    with open(script_path, 'r') as fp:
        commands = fp.read().splitlines()

    body = {
        'name': 'paraview',
        'commands': commands,
        'input': [],
        'output': []
    }

    job = client.post('jobs', data=json.dumps(body))
    task.logger.info('ParaView job created: %s' % job['_id'])
    task.taskflow.logger.info('ParaView job created.')

    task.taskflow.set_metadata('jobs', [job])

    # Upload the visualizer code
    task.logger.info('Uploading visualizer')
    viz_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../',  '../', '../','../',
            'node_modules/pvw-visualizer/server/pvw-visualizer.py'))

    if not os.path.exists(viz_path):
        task.logger.error('Unable to local pvw-visualizer.py for upload.')
        return

    target_dir = job_directory(cluster, job)
    target_path = os.path.join(target_dir, 'pvw-visualizer.py')

    with get_connection(task.taskflow.girder_token, cluster) as conn:
        conn.makedirs(target_dir)
        with open(viz_path, 'r') as fp:
            conn.put(fp, target_path)

    submit_paraview_job.delay(cluster, job,  *args, **kwargs)
Beispiel #16
0
def create_openfoam_job(task, *args, **kwargs):
    # Girder client
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Save the cluster in the taskflow for termination
    cluster = kwargs.pop('cluster')
    task.taskflow.set_metadata('cluster', cluster)

    # Create job definition
    task.taskflow.logger.info('Creating OpenFoam job.')
    body = {
        'name':
        'openfoam_run',
        'commands': [
            # TODO: Setting command for Visualizer's access
            'chmod +x $PWD/run.sh',
            # NOTE: idk what the command below
            #'xhost +local:of_v1612_plus',
            #'docker start of_v1612_plus',
            '$PWD/run.sh $PWD'
        ],
        'input': [{
            'folderId': kwargs['input']['folder']['id'],
            'path': '.'
        }],
        'output': [{
            'path': 'simulation/log.blockMesh'
        }, {
            'path': 'simulation/log.patchSummary'
        }, {
            'path': 'simulation/log.potentialFoam'
        }, {
            'path': 'simulation/log.reconstructParMesh'
        }, {
            'path': 'simulation/log.surfaceFeatureExtract'
        }, {
            'path': 'simulation/log.snappyHexMesh'
        }, {
            'path': 'simulation/log.simpleFoam'
        }]
    }

    # Register job in girder + attach to taskflow
    job = client.post('jobs', data=json.dumps(body))
    task.logger.info('OpenFOAM job created: %s' % job['_id'])
    task.taskflow.logger.info('OpenFOAM job created.')
    task.taskflow.set_metadata('jobs', [job])

    # Capture job working directory
    target_dir = job_directory(cluster, job)
    task.taskflow.set_metadata('dataDir', target_dir)

    # Move to the next task
    submit_open_foam_job.delay(cluster, job, *args, **kwargs)
Beispiel #17
0
def create_openfoam_job(task, *args, **kwargs):
    # Girder client
    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Save the cluster in the taskflow for termination
    cluster = kwargs.pop('cluster')
    task.taskflow.set_metadata('cluster', cluster)

    # Create job definition
    task.taskflow.logger.info('Creating OpenFoam job.')
    body = {
        'name': 'openfoam_run',
        'commands': [
            'python $PWD/simput-unpack.py $PWD/input-deck.json $PWD',
            'docker start of_v1612_plus',
            'docker exec -t of_v1612_plus $PWD/DockerRun $PWD'
        ],
        'input': [
            {
              'folderId': kwargs['input']['folder']['id'],
              'path': '.'
            },
            {
              'folderId': kwargs['input']['project']['folder']['id'],
              'path': '.'
            }
        ],
        'output': [
        ]
    }

    # Register job in girder + attach to taskflow
    job = client.post('jobs', data=json.dumps(body))
    task.logger.info('OpenFOAM job created: %s' % job['_id'])
    task.taskflow.logger.info('OpenFOAM job created.')
    task.taskflow.set_metadata('jobs', [job])

    # Capture job working directory
    target_dir = job_directory(cluster, job)
    task.taskflow.set_metadata('dataDir', target_dir)

    source_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../../../../../',
            'node_modules/simput/bin/unpack/simput-unpack.py'))
    target_path = os.path.join(target_dir, 'simput-unpack.py')

    # Upload unpack script
    with get_connection(task.taskflow.girder_token, cluster) as conn:
        conn.makedirs(target_dir)
        with open(source_path, 'r') as fp:
            conn.put(fp, target_path)

    # Move to the next task
    submit_open_foam_job.delay(cluster, job,  *args, **kwargs)
Beispiel #18
0
def _get_digest(task, repository, tag):
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)
    params = {'repository': repository, 'tag': tag}
    r = client.get('images', params)
    images = r['results']
    if len(images) < 1:
        log_and_raise(task, 'Image not found on the server.')

    # The digest should definitely be here
    return images[0]['digest']
Beispiel #19
0
def create_proxy_entry(task, cluster, job):
    session_key = job['params']['sessionKey']
    host = cluster['config']['host']
    body = {
        'host': host,
        'port': cluster['config']['paraview']['websocketPort'],
        'key': session_key
    }
    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)
    client.post('proxy', data=json.dumps(body))
Beispiel #20
0
def create_proxy_entry(task, cluster, job):
    session_key = job['params']['sessionKey']
    host = cluster['config']['host']
    body = {
        'host': host,
        'port': cluster['config']['paraview']['websocketPort'],
        'key': session_key
    }
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)
    client.post('proxy', data=json.dumps(body))
Beispiel #21
0
def _ensure_image_on_server(task, repository, tag, digest, container='docker'):
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)
    params = {'repository': repository, 'tag': tag, 'digest': digest}
    r = client.get('images', params)
    images = r['results']
    if len(images) < 1:
        log_and_raise(task, 'Image not found on the server.')

    if container not in images[0]:
        msg = 'Image does not have container type: ' + container
        log_and_raise(task, msg)
Beispiel #22
0
def postprocess_job(task, _, input_, cluster, image, run_parameters,
                    root_folder, container_description, input_folder,
                    output_folder, scratch_folder, job):
    task.taskflow.logger.info('Processing the results of the job.')
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    # remove temporary input folder folder, this data is attached to the calculation model
    client.delete('folder/%s' % input_folder['_id'])

    # clean up the scratch folder
    keep_scratch = run_parameters.get('keepScratch', False)
    if keep_scratch:
        scratch_folder_id = scratch_folder['_id']
    else:
        client.delete('folder/%s' % scratch_folder['_id'])
        scratch_folder_id = None

    # ingest the output of the calculation
    output_format = container_description['output']['format']
    output_file = None
    output_items = list(client.listItem(output_folder['_id']))
    for item in output_items:
        if item['name'] == 'output.%s' % output_format:
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))
            output_file = files[0]
            break

    if output_file is None:
        raise Exception('The calculation did not produce any output file.')

    # Now call endpoint to ingest result
    body = {
        'fileId': output_file['_id'],
        'format': output_format,
        'public': True,
        'image':
        image,  # image now also has a digest field, add it to the calculation
        'scratchFolderId': scratch_folder_id
    }
    client.put('calculations/%s' % input_['calculation']['_id'], json=body)
Beispiel #23
0
def paraview_terminate(task):
    cluster = parse('meta.cluster').find(task.taskflow)
    if cluster:
        cluster = cluster[0].value
    else:
        task.logger.warning('Unable to extract cluster from taskflow. '
                            'Unable to terminate ParaView job.')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    jobs = task.taskflow.get('meta', {}).get('jobs', [])
    terminate_jobs(task, client, cluster, jobs)
Beispiel #24
0
def paraview_terminate(task):
    cluster = parse('meta.cluster').find(task.taskflow)
    if cluster:
        cluster = cluster[0].value
    else:
        task.logger.warning('Unable to extract cluster from taskflow. '
                         'Unable to terminate ParaView job.')

    client = create_girder_client(
            task.taskflow.girder_api_url, task.taskflow.girder_token)

    jobs = task.taskflow.get('meta', {}).get('jobs', [])
    terminate_jobs(task, client, cluster, jobs)
Beispiel #25
0
def _create_description_job(task, cluster, description_folder, image,
                            run_parameters):
    params = _get_job_parameters(task, cluster, image, run_parameters)
    container = params['container']
    setup_commands = params['setupCommands']
    repository = params['repository']
    tag = params['tag']
    digest = params['digest']
    job_parameters = params['jobParameters']

    # Make sure the image exists on the server before we go further
    _ensure_image_on_server(task, repository, tag, digest, container)

    output_file = 'description.json'

    run_command = '%s run $IMAGE_NAME' % container
    # Shifter has a pretty different sytax so special case it.
    if container == 'shifter':
        run_command = 'shifter --image=$IMAGE_NAME --entrypoint --'

    image_name = '%s:%s' % (repository, tag)
    if container == 'singularity':
        image_name = digest_to_sif(digest)

    commands = setup_commands + [
        'IMAGE_NAME=%s' % image_name,
        '%s -d > %s' % (run_command, output_file)
    ]

    body = {
        # ensure there are no special characters in the submission script name
        'name': 'desc_%s' % re.sub('[^a-zA-Z0-9]', '_', repository),
        'commands': commands,
        'input': [{
            'folderId': description_folder['_id'],
            'path': '.'
        }],
        'output': [{
            'folderId': description_folder['_id'],
            'path': '.'
        }],
        'uploadOutput': False,
        'params': job_parameters
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    return job
def postprocess_job(task, _, user, cluster, image, job, folder, container):
    task.taskflow.logger.info('Finished pulling the container')

    task.taskflow.logger.info('Processing the results of the pull.')
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    output_items = list(client.listItem(folder['_id']))
    output_file = None
    for item in output_items:
        if item['name'] == OUTPUT_FILE:
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                log_and_raise(
                    task, 'Expecting a single file under item, found: %s' %
                    len(files))
            output_file = files[0]
            break

    if output_file is None:
        log_and_raise(task, 'Could not locate output file: %s' % OUTPUT_FILE)

    # Download the output file
    output_io = io.BytesIO()
    client.downloadFile(output_file['_id'], output_io)

    # Remove the folder
    client.delete('folder/%s' % folder['_id'])

    pull_json = json.loads(output_io.getvalue().decode('utf-8'))
    image_uri = pull_json.get('imageUri')
    # Convert size to GB
    size = round(pull_json.get('size', 0) / 1.e9, 2)

    _ensure_image_uri_is_valid(task, container, image_uri)

    repository = image.get('repository')
    tag = image.get('tag')
    digest = _extract_digest(container, image_uri)

    post_image_to_database(client, container, repository, tag, digest, cluster,
                           size)

    task.taskflow.logger.info('Success!')
Beispiel #27
0
def start(task, input_, user, cluster, image, run_parameters):
    """
    The flow is the following:
    - Dry run the container with the -d flag to obtain a description of the input/output formats
    - Convert the cjson input geometry to conform to the container's expected format
    - Run the container
    - Convert the container output format into cjson
    - Ingest the output in the database
    """
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = get_cori(client)

    if '_id' not in cluster:
        log_and_raise(task, 'Invalid cluster configurations: %s' % cluster)

    oc_folder = get_oc_folder(client)
    root_folder = client.createFolder(
        oc_folder['_id'],
        datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f"))
    # temporary folder to save the container in/out description
    description_folder = client.createFolder(root_folder['_id'], 'description')

    job = _create_description_job(task, cluster, description_folder, image,
                                  run_parameters)

    task.taskflow.logger.info(
        'Preparing job to obtain the container description.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=task.taskflow.girder_token,
                               submit=False)
    task.taskflow.logger.info(
        'Submitting job to obtain the container description.')

    submit_job(cluster,
               job,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    monitor_job.apply_async(
        (cluster, job), {
            'girder_token': task.taskflow.girder_token,
            'monitor_interval': 10
        },
        countdown=countdown(cluster),
        link=postprocess_description.s(input_, user, cluster, image,
                                       run_parameters, root_folder, job,
                                       description_folder))
Beispiel #28
0
def _update_cluster_config(task, cluster):
    if cluster['type'] == 'ec2':
        paraview_config = cluster['config'].setdefault('paraview', {})
        paraview_config['installDir'] = '/opt/paraview'
        paraview_config['websocketPort'] = 9000

        # Update ParaView config on cluster
        client = create_girder_client(task.taskflow.girder_api_url,
                                      task.taskflow.girder_token)
        client.patch('clusters/%s' % cluster['_id'],
                     data=json.dumps({
                         'config': {
                             'paraview': cluster['config']['paraview']
                         }
                     }))
Beispiel #29
0
def _update_cluster_config(task, cluster):
    if cluster['type'] == 'ec2':
        paraview_config = cluster['config'].setdefault('paraview', {})
        paraview_config['installDir'] = '/opt/paraview'
        paraview_config['websocketPort'] = 9000

        # Update ParaView config on cluster
        client = create_girder_client(
            task.taskflow.girder_api_url, task.taskflow.girder_token)
        client.patch('clusters/%s' % cluster['_id'],
                     data=json.dumps({
                        'config': {
                            'paraview': cluster['config']['paraview']
                        }
                    }))
def start(task, user, cluster, image, container):
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = get_cori(client)

    if '_id' not in cluster:
        log_and_raise(task, 'Invalid cluster configurations: %s' % cluster)

    oc_folder = get_oc_folder(client)
    root_folder = client.createFolder(
        oc_folder['_id'],
        datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f"))
    # temporary folder to save the container in/out description
    folder = client.createFolder(root_folder['_id'], 'pull_folder')

    # save the pull.py script to the job directory
    with open(os.path.join(os.path.dirname(__file__), 'utils/pull.py'),
              'rb') as f:
        # Get the size of the file
        size = f.seek(0, 2)
        f.seek(0)
        name = 'pull.py'
        client.uploadFile(folder['_id'], f, name, size, parentType='folder')

    job = _create_job(task, cluster, folder, image, container)

    # Now download pull.py script to the cluster
    task.taskflow.logger.info('Preparing job to pull the container.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=task.taskflow.girder_token,
                               submit=False)

    task.taskflow.logger.info('Submitting job to pull the container.')
    submit_job(cluster,
               job,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    monitor_job.apply_async((cluster, job), {
        'girder_token': task.taskflow.girder_token,
        'monitor_interval': 10
    },
                            countdown=countdown(cluster),
                            link=postprocess_job.s(user, cluster, image, job,
                                                   folder, container))
Beispiel #31
0
def upload_output(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')

    # Refresh state of job
    client = create_girder_client(
            task.taskflow.girder_api_url, task.taskflow.girder_token)
    job = client.get('jobs/%s' % job['_id'])

    output_folder_id = parse('output.folder.id').find(kwargs)
    if output_folder_id:
        output_folder_id = output_folder_id[0].value
        job['output'] = [{
            'folderId': output_folder_id,
            'path': '.'
        }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload complete.')
Beispiel #32
0
def upload_output(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')

    # Refresh state of job
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)
    job = client.get('jobs/%s' % job['_id'])

    output_folder_id = parse('output.folder.id').find(kwargs)
    if output_folder_id:
        output_folder_id = output_folder_id[0].value
        job['output'] = [{'folderId': output_folder_id, 'path': '.'}]

    upload_job_output_to_folder(cluster,
                                job,
                                log_write_url=None,
                                job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload complete.')
Beispiel #33
0
def upload_input(task, cluster, job, *args, **kwargs):
    file_id = parse('input.file.id').find(kwargs)
    if file_id:
        file_id = file_id[0].value
        task.logger.info('Visualizing file ID: %s' % file_id)
        job['params']['dataDir'] = '.'

        # Fetch the file
        girder_client = create_girder_client(
            task.taskflow.girder_api_url, task.taskflow.girder_token)
        file = girder_client.getResource('file', file_id)

        # Set the file to load
        filename = file['name']
        job['params']['fileName'] = filename
        task.logger.info('Filename is: %s' % filename)

        task.logger.info('Uploading file to cluster.')
        job_dir = job_directory(cluster, job)
        upload_file(cluster, task.taskflow.girder_token, file, job_dir)
        task.logger.info('Upload complete.')
Beispiel #34
0
def upload_export_output(task, _, cluster, job, *args, **kwargs):
    output_folder_id = kwargs['output']['folder']['id']

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    for job_id in task.taskflow.get_metadata('export_jobs')['export_jobs']:
        # Get job
        export_job = client.get('jobs/%s' % job_id)
        export_job['output'] = [{
            'folderId': output_folder_id,
            'path': '.'
        }]

        upload_job_output_to_folder(cluster, export_job, log_write_url=None,
            job_dir=None, girder_token=task.taskflow.girder_token)

    # Upload the vtu files
    girder_token = task.taskflow.girder_token
    download_path_from_cluster(cluster, girder_token, output_folder_id, job['dir'],
                               include=['^.*\\.vtu$'])
Beispiel #35
0
def upload_export_output(task, _, cluster, job, *args, **kwargs):
    output_folder_id = kwargs['output']['folder']['id']

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    for job_id in task.taskflow.get_metadata('export_jobs')['export_jobs']:
        # Get job
        export_job = client.get('jobs/%s' % job_id)
        export_job['output'] = [{
            'folderId': output_folder_id,
            'path': '.'
        }]

        upload_job_output_to_folder(cluster, export_job, log_write_url=None,
            job_dir=None, girder_token=task.taskflow.girder_token)

    # Upload the vtu files
    girder_token = task.taskflow.girder_token
    download_path_from_cluster(cluster, girder_token, output_folder_id, job['dir'],
                               include=['^.*\\.vtu$'])
Beispiel #36
0
def _create_description_job(task, cluster, description_folder, image,
                            run_parameters):
    params = _get_job_parameters(cluster, image, run_parameters)
    container = params['container']
    setup_commands = params['setupCommands']
    repository = params['repository']
    tag = params['tag']

    job_params = {'taskFlowId': task.taskflow.id}

    output_file = 'description.json'

    commands = setup_commands + [
        'IMAGE_NAME=$(python pull.py -r %s -t %s -c %s | tail -1)' %
        (repository, tag, container),
        '%s run $IMAGE_NAME -d > %s' % (container, output_file), 'rm pull.py'
    ]

    body = {
        # ensure there are no special characters in the submission script name
        'name': 'desc_%s' % re.sub('[^a-zA-Z0-9]', '_', repository),
        'commands': commands,
        'input': [{
            'folderId': description_folder['_id'],
            'path': '.'
        }],
        'output': [{
            'folderId': description_folder['_id'],
            'path': '.'
        }],
        'uploadOutput': False,
        'params': job_params
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    return job
Beispiel #37
0
def upload_output(task, upstream_result):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = upstream_result['output']['folder']['id']
    cluster = upstream_result['cluster']
    job = upstream_result['job']

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{
        'folderId': output_folder_id,
        'path': '.'
    }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    return upstream_result
Beispiel #38
0
def setup_input(task, *args, **kwargs):
    input_folder_id = kwargs['input']['folder']['id']

    number_of_procs = kwargs.get('numberOfSlots')
    if not number_of_procs:
        number_of_procs = kwargs.get('numberOfNodes')

    if not number_of_procs:
        size = parse('cluster.config.launch.params.node_instance_count').find(kwargs)
        if size:
            number_of_procs = size[0].value + 1
        else:
            raise Exception('Unable to extract number of nodes in cluster')


    if not number_of_procs:
        raise Exception('Unable to determine number of mpi processes to run.')

    number_of_procs = int(number_of_procs)
    kwargs['numberOfProcs']  = number_of_procs

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    geometry_file_id = parse('input.geometryFile.id').find(kwargs)
    if geometry_file_id:
        geometry_file_id = geometry_file_id[0].value
        kwargs['geometryFileId'] = geometry_file_id

        # Get the geometry file metadata to see if we need to import
        geometry_file = client.getResource('file/%s' % geometry_file_id)
        kwargs['geometryFilename'] = geometry_file['name']

    ini_file_id = kwargs['input']['nwFile']['id']
    ini_file = client.getResource('file/%s' % ini_file_id)
    kwargs['nwFilename'] = ini_file['name']

    return kwargs
Beispiel #39
0
def postprocess(task, _, run_folder, input_, cluster, job):
    task.taskflow.logger.info('Uploading results from cluster')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    output_folder = client.createFolder(run_folder['_id'], 'output')
    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{'folderId': output_folder['_id'], 'path': '.'}]

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    input_file_name = task.taskflow.get_metadata('inputFileName')
    input_file_name
    # Call to ingest the files
    for item in client.listItem(output_folder['_id']):
        if item['name'].endswith('.json'):
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))

            json_output_file_id = files[0]['_id']
            # Now call endpoint to ingest result
            body = {
                'calculationId': input_['calculation']['_id'],
                'fileId': json_output_file_id,
                'public': True
            }

            client.post('molecules', json=body)
Beispiel #40
0
def create_job(task, *args, **kwargs):
    task.logger.info('Taskflow %s' % task.taskflow.id)
    task.taskflow.logger.info('Create PyFr job.')
    input_folder_id = kwargs['input']['folder']['id']

    backend = kwargs['backend']['type']

    body = {
        'name': 'pyfr_run',
        'commands': [
            "mpiexec -n %s pyfr run -b %s input/%s input/%s" % (
                kwargs['numberOfProcs'],
                backend,
                kwargs['meshFilename'],
                kwargs['iniFilename'])
        ],
        'input': [
            {
              'folderId': input_folder_id,
              'path': 'input'
            }
        ],
        'output': [],
        'params': {
            'numberOfSlots': kwargs['numberOfProcs']
        }
    }

    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))

    task.taskflow.set_metadata('jobs', [job])

    submit.delay(job, *args, **kwargs)
Beispiel #41
0
def pyfr_terminate(task):
    cluster = task.taskflow['meta']['cluster']
    client = create_girder_client(
                task.taskflow.girder_api_url, task.taskflow.girder_token)
    terminate_jobs(
        task, client, cluster, task.taskflow.get('meta', {}).get('jobs', []))
Beispiel #42
0
def upload_output(task, _, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = kwargs['output']['folder']['id']

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{
        'folderId': output_folder_id,
        'path': '.'
    }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    mesh_file_id = kwargs.pop('meshFileId')

    solution_files = list(_list_solution_files(client, output_folder_id))

    if len(solution_files) == 0:
        raise Exception('No solution files where produced, please check output files for errors.')

    # Generate and save the first vtu file that should be loaded for this
    # run. This can then be used to know which file to open as part of any viz
    # step.
    file_names = [f['name'] for f in solution_files]
    file_names.sort()
    vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
    task.taskflow.set_metadata('vtuFile', vtu_file)

    number_files = len(solution_files)

    # By default export solution files to VTK format using a set of batch jobs
    if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']:

        number_of_jobs = kwargs['numberOfProcs']
        task.logger.info('Generating %d export jobs' % number_of_jobs)

        sim_job_dir = job['dir']
        jobs = []
        job_index = 1
        for chunk in [solution_files[i::number_of_jobs] for i in xrange(number_of_jobs)]:
            if chunk:
                name = 'pyfr_export_%d' % job_index
                mesh_filename = kwargs['meshFilename']
                export_job = create_export_job(
                    task, name, chunk, sim_job_dir, mesh_filename)
                submit_job(cluster, export_job, log_write_url=None,
                              girder_token=task.taskflow.girder_token, monitor=False)
                jobs.append(export_job)
                job_index += 1

        # Update the jobs list in the metadata
        task.taskflow.set_metadata('jobs', [j for j in jobs] +
                                   [job])
        # Also save just the export job ids
        task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs])

        monitor_jobs.apply_async(
            (cluster, jobs), {'girder_token': task.taskflow.girder_token},
            link=upload_export_output.s(cluster, job, *args, **kwargs))
    else:
        # The number 100 is pretty arbitrary!
        if number_files < 100:
            export_output.delay(
                output_folder_id, mesh_file_id, solution_files)
        # Break into chunks a run in parallel
        else:
            for chunk in [solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS)]:
                export_output.delay(output_folder_id, mesh_file_id, chunk)
Beispiel #43
0
def setup_input(task, *args, **kwargs):
    input_folder_id = kwargs['input']['folder']['id']
    mesh_file_id = kwargs['input']['meshFile']['id']
    kwargs['meshFileId'] = mesh_file_id

    number_of_procs = kwargs.get('numberOfSlots')
    if not number_of_procs:
        number_of_procs = kwargs.get('numberOfNodes')

    if not number_of_procs:
        size = parse('cluster.config.launch.params.node_instance_count').find(kwargs)
        if size:
            number_of_procs = size[0].value + 1
        else:
            raise Exception('Unable to extract number of nodes in cluster')


    if not number_of_procs:
        raise Exception('Unable to determine number of mpi processes to run.')

    number_of_procs = int(number_of_procs)
    kwargs['numberOfProcs']  = number_of_procs

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Get the mesh file metadata to see if we need to import
    mesh_file = client.getResource('file/%s' % mesh_file_id)


    import_mesh = True
    if PYFR_MESH_EXT in mesh_file['exts']:
        task.logger.info('Mesh is already in pyfrm format.')
        kwargs['meshFilename'] = mesh_file['name']
        import_mesh = False

    if import_mesh or number_of_procs > 1:
        task.logger.info('Downloading input mesh.')

        try:
            _, input_path = tempfile.mkstemp()
            output_dir =  tempfile.mkdtemp()
            mesh_filename = '%s.pyfrm' % (mesh_file['name'].rsplit('.', 1)[0])
            output_path = os.path.join(output_dir, mesh_filename)
            kwargs['meshFilename'] = mesh_filename

            client.downloadFile(mesh_file_id, input_path)
            task.logger.info('Downloading complete.')


            if import_mesh:
                task.taskflow.logger.info('Importing mesh into PyFr format.')

                extn = mesh_file['exts'][0]
                task.logger.info('Converting mesh to pyfrm format.')
                _import_mesh(task.taskflow.logger, input_path, output_path, extn)
                task.logger.info('Conversion complete.')

            task.logger.info('Partitioning the mesh.')

            if number_of_procs > 1:
                _partition_mesh(
                    task.logger, output_path, output_dir, number_of_procs)
            else:
                task.logger.info('Skipping partitioning we are running on a single node.')

            task.logger.info('Partitioning complete.')

            task.logger.info('Uploading converted mesh.')
            size = os.path.getsize(output_path)
            with open(output_path) as fp:
                girder_file = client.uploadFile(
                    input_folder_id, fp, mesh_filename, size=size,
                    parentType='folder')
                kwargs['meshFileId'] = girder_file['_id']

            task.logger.info('Upload input complete.')

            task.logger.info('Updating backend configuration.')

        finally:
            if os.path.exists(input_path):
                os.remove(input_path)
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

    # If we are running in the cloud determine backend to use
    if kwargs['cluster']['type'] == 'ec2':
        pyfr_config = {}
        # If we have GPUs use cuda
        if has_gpus(kwargs['cluster']):
            backend = {
                'type': 'cuda',
                'device-id': 'round-robin'
            }

            # Update cluster configuation
            pyfr_config['cuda'] = True

        # Use OpenMP
        else:
            backend = {
                'type': 'openmp',
                'cblas': '/usr/lib/openblas-base/libblas.so'
            }

            # Update cluster configuation
            pyfr_config['openmp'] = [{
                'name' : 'ec2profile',
                'cblas' : "/usr/lib/libblas/libblas.so",
            }]

        kwargs['backend'] = backend

        # Patch cluster in Girder
        client.patch('clusters/%s' % kwargs['cluster']['_id'],
                     data=json.dumps({
                        'config': {
                            'pyfr': pyfr_config
                        }
                    }))

    update_config_file(task, client, *args, **kwargs)

    ini_file_id = kwargs['input']['iniFile']['id']
    ini_file = client.getResource('file/%s' % ini_file_id)
    kwargs['iniFilename'] = ini_file['name']

    create_job.delay(*args, **kwargs)
Beispiel #44
0
def start(task, input_, cluster, image, run_parameters):
    """
    The flow is the following:
    - Dry run the container with the -d flag to obtain a description of the input/output formats
    - Convert the cjson input geometry to conform to the container's expected format
    - Run the container
    - Convert the container output format into cjson
    - Ingest the output in the database
    """
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = _get_cori(client)

    if '_id' not in cluster:
        raise Exception('Invalid cluster configurations: %s' % cluster)

    oc_folder = _get_oc_folder(client)
    root_folder = client.createFolder(
        oc_folder['_id'],
        datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f"))
    # temporary folder to save the container in/out description
    description_folder = client.createFolder(root_folder['_id'], 'description')

    # save the pull.py script to the job directory
    with open(os.path.join(os.path.dirname(__file__), 'utils/pull.py'),
              'rb') as f:
        # Get the size of the file
        size = f.seek(0, 2)
        f.seek(0)
        name = 'pull.py'
        input_parameters_file = client.uploadFile(description_folder['_id'],
                                                  f,
                                                  name,
                                                  size,
                                                  parentType='folder')

    job = _create_description_job(task, cluster, description_folder, image,
                                  run_parameters)

    # Now download pull.py script to the cluster
    task.taskflow.logger.info(
        'Downloading description input files to cluster.')
    download_job_input_folders(cluster,
                               job,
                               girder_token=task.taskflow.girder_token,
                               submit=False)
    task.taskflow.logger.info('Downloading complete.')

    submit_job(cluster,
               job,
               girder_token=task.taskflow.girder_token,
               monitor=False)

    monitor_job.apply_async(
        (cluster, job), {
            'girder_token': task.taskflow.girder_token,
            'monitor_interval': 10
        },
        link=postprocess_description.s(input_, cluster, image, run_parameters,
                                       root_folder, job, description_folder))
Beispiel #45
0
def postprocess_description(task, _, input_, cluster, image, run_parameters,
                            root_folder, description_job, description_folder):
    task.taskflow.logger.info('Processing description job output.')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    description_job = client.get('jobs/%s' % description_job['_id'])

    upload_job_output_to_folder(cluster,
                                description_job,
                                girder_token=task.taskflow.girder_token)

    description_items = list(client.listItem(description_folder['_id']))

    description_file = None
    pull_file = None
    for item in description_items:
        if item['name'] == 'description.json':
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))
            description_file = files[0]

        elif item['name'] == 'pull.json':
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))
            pull_file = files[0]

    if pull_file is None:
        raise Exception(
            'There was an error trying to pull the requested container image')

    if description_file is None:
        raise Exception(
            'The container does not implement correctly the --description flag'
        )

    with tempfile.TemporaryFile() as tf:
        client.downloadFile(pull_file['_id'], tf)
        tf.seek(0)
        container_pull = json.loads(tf.read().decode())

    image = container_pull

    with tempfile.TemporaryFile() as tf:
        client.downloadFile(description_file['_id'], tf)
        tf.seek(0)
        container_description = json.loads(tf.read().decode())

    # remove temporary description folder
    client.delete('folder/%s' % description_folder['_id'])

    setup_input.delay(input_, cluster, image, run_parameters, root_folder,
                      container_description)
Beispiel #46
0
def setup_input(task, input_, cluster, image, run_parameters, root_folder,
                container_description):
    task.taskflow.logger.info('Setting up calculation input.')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    if cluster.get('name') == 'cori':
        cluster = _get_cori(client)

    if '_id' not in cluster:
        raise Exception('Invalid cluster configurations: %s' % cluster)

    calculation_id = parse('calculation._id').find(input_)
    if not calculation_id:
        raise Exception('Unable to extract calculation id.')

    calculation_id = calculation_id[0].value
    calculation = client.get('calculations/%s' % calculation_id)
    molecule_id = calculation['moleculeId']

    input_parameters = calculation.get('input', {}).get('parameters', {})

    # Fetch the starting geometry
    input_geometry = calculation.get('input', {}).get('geometry', None)
    if input_geometry is None:
        r = client.get('molecules/%s/cjson' % molecule_id, jsonResp=False)
        cjson = r.json()
    else:
        # TODO: implement the path where a specific input geometry exists
        raise NotImplementedError(
            'Running a calculation with a specific geometry is not implemented yet.'
        )

    input_format = container_description['input']['format']
    output_format = container_description['output']['format']

    # The folder where the input geometry and input parameters are
    input_folder = client.createFolder(root_folder['_id'], 'input')
    # The folder where the converted output will be at the end of the job
    output_folder = client.createFolder(root_folder['_id'], 'output')
    # The folder where the raw input/output files of the specific code are stored
    scratch_folder = client.createFolder(root_folder['_id'], 'scratch')

    # Save the input parameters to file
    with tempfile.TemporaryFile() as fp:
        fp.write(json.dumps(input_parameters).encode())
        # Get the size of the file
        size = fp.seek(0, 2)
        fp.seek(0)
        name = 'input_parameters.json'
        input_parameters_file = client.uploadFile(input_folder['_id'],
                                                  fp,
                                                  name,
                                                  size,
                                                  parentType='folder')

    # Save the input geometry to file
    with tempfile.TemporaryFile() as fp:
        content = _convert_geometry(cjson, input_format)
        fp.write(content.encode())
        # Get the size of the file
        size = fp.seek(0, 2)
        fp.seek(0)
        name = 'geometry.%s' % input_format
        input_geometry_file = client.uploadFile(input_folder['_id'],
                                                fp,
                                                name,
                                                size,
                                                parentType='folder')

    submit_calculation.delay(input_, cluster, image, run_parameters,
                             root_folder, container_description, input_folder,
                             output_folder, scratch_folder)
Beispiel #47
0
def _create_job(task, cluster, image, run_parameters, container_description,
                input_folder, output_folder, scratch_folder):
    params = _get_job_parameters(cluster, image, run_parameters)
    container = params['container']
    image_uri = params['imageUri']
    repository = params['repository']
    digest = params['digest']
    host_dir = params['hostDir']
    guest_dir = params['guestDir']
    job_dir = params['jobDir']

    task.taskflow.logger.info('Create %s job' % repository)

    input_format = container_description['input']['format']
    output_format = container_description['output']['format']

    input_dir = os.path.join(guest_dir, job_dir, 'input')
    output_dir = os.path.join(guest_dir, job_dir, 'output')
    scratch_dir = os.path.join(guest_dir, job_dir, 'scratch')

    geometry_filename = os.path.join(input_dir, 'geometry.%s' % input_format)
    parameters_filename = os.path.join(input_dir, 'input_parameters.json')
    output_filename = os.path.join(output_dir, 'output.%s' % output_format)

    output = [{'folderId': output_folder['_id'], 'path': './output'}]

    keep_scratch = run_parameters.get('keepScratch', False)
    if keep_scratch:
        output.append({'folderId': scratch_folder['_id'], 'path': './scratch'})

    if container == 'docker':
        mount_option = '-v %s:%s' % (host_dir, guest_dir)
    else:
        mount_option = ''

    body = {
        # ensure there are no special characters in the submission script name
        'name':
        'run_%s' % re.sub('[^a-zA-Z0-9]', '_', repository),
        'commands': [
            'mkdir output', 'mkdir scratch',
            '%s run %s %s -g %s -p %s -o %s -s %s' %
            (container, mount_option, image_uri, geometry_filename,
             parameters_filename, output_filename, scratch_dir)
        ],
        'input': [{
            'folderId': input_folder['_id'],
            'path': './input'
        }],
        'output':
        output,
        'uploadOutput':
        False,
        'params': {
            'taskFlowId': task.taskflow.id
        }
    }

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    job = client.post('jobs', data=json.dumps(body))
    task.taskflow.set_metadata('jobs', [job])

    return job