예제 #1
0
파일: job.py 프로젝트: jor-/simulation
    def __init__(self, output_dir, parameters, cf_kind, eval_f=True, eval_df=True, nodes_setup=None, **cf_kargs):
        from simulation.optimization.constants import COST_FUNCTION_NODES_SETUP_JOB
        
        logger.debug('Initiating cost function job with cf_kind {}, eval_f {} and eval_df {}.'.format(cf_kind, eval_f, eval_df))

        super().__init__(output_dir)
        
        ## prepare job name
        data_kind = cf_kargs['data_kind']
        try:
            job_name = cf_kargs['job_setup']['name']
        except (KeyError, TypeError):
            job_name = '{}_{}'.format(data_kind, cf_kind)
            if cf_kind == 'GLS':
                job_name = job_name + '_{}_{}'.format(cf_kargs['correlation_min_values'], cf_kargs['correlation_max_year_diff'])

        ## prepare job_setup
        if 'job_setup' in cf_kargs:
            job_setup = cf_kargs['job_setup']
            del cf_kargs['job_setup']
        else:
            job_setup = None

        ## save CF options
        self.options['/cf/kind'] = cf_kind
        self.options['/cf/parameters'] = parameters
        for key, value in cf_kargs.items():
            if value is not None:
                self.options['/cf/{}'.format(key)] = value

        python_script_file = os.path.join(output_dir, 'run.py')
        self.options['/cf/run_file'] = python_script_file

        ## prepare job options and init job file
        node_numbers = 1
        cpu_numbers = 1

        if data_kind == 'WOA':
            memory_gb = 2
        if 'WOD' in data_kind:
            memory_gb = 24
            if data_kind  == 'OLDWOD' and cf_kind == 'GLS':
                if cf_kargs['correlation_min_values'] >= 35:
                    memory_gb = 30
                elif cf_kargs['correlation_min_values'] >= 30:
                    memory_gb = 35
                else:
                    memory_gb = 45
            if data_kind  == 'WOD' and cf_kind == 'GLS':
                if cf_kargs['correlation_min_values'] >= 45:
                    memory_gb = 30
                elif cf_kargs['correlation_min_values'] >= 40:
                    memory_gb = 35
                elif cf_kargs['correlation_min_values'] >= 35:
                    memory_gb = 40
                else:
                    memory_gb = 45
            if data_kind  == 'WOD.1' and cf_kind == 'GLS':
                if cf_kargs['correlation_min_values'] >= 45:
                    memory_gb = 25
                elif cf_kargs['correlation_min_values'] >= 40:
                    memory_gb = 30
                elif cf_kargs['correlation_min_values'] >= 35:
                    memory_gb = 30
                else:
                    memory_gb = 35
            if data_kind  == 'WOD.0' and cf_kind == 'GLS':
                if cf_kargs['correlation_min_values'] >= 45:
                    memory_gb = 25
                elif cf_kargs['correlation_min_values'] >= 40:
                    memory_gb = 25
                elif cf_kargs['correlation_min_values'] >= 35:
                    memory_gb = 25
                else:
                    memory_gb = 25
        if nodes_setup is None:
            nodes_setup = COST_FUNCTION_NODES_SETUP_JOB.copy()
        nodes_setup['memory'] = memory_gb
        queue = None
        super().init_job_file(job_name, nodes_setup, queue=queue)

        ## convert inf to negative for script
        if 'correlation_max_year_diff' in cf_kargs and cf_kargs['correlation_max_year_diff'] == float('inf'):
            cf_kargs['correlation_max_year_diff'] = -1

        ## write python script
        commands = ['import util.logging']
        commands += ['import numpy as np']
        commands += ['with util.logging.Logger():']
        commands += ['    import simulation.optimization.cost_function']
        commands += ['    cf_kargs = {}'.format(cf_kargs)]
        if job_setup is not None:
            commands += ['    import util.batch.universal.system']
            commands += ['    job_setup = {}']
            for setup_name in ('spinup', 'derivative', 'trajectory'):
                if setup_name in job_setup:
                    nodes_setup = job_setup[setup_name]['nodes_setup']
                    nodes_setup_str = 'util.batch.universal.system.{}'.format(nodes_setup)
                    job_setup_str = "{'" + setup_name + "':{'nodes_setup':" + nodes_setup_str + "}}"
                    commands += ["    job_setup.update({})".format(job_setup_str)]
            commands += ["    cf_kargs.update({'job_setup':job_setup})"]
        commands += ['    cf = simulation.optimization.cost_function.{}(**cf_kargs)'.format(cf_kind)]

        from simulation.model.constants import DATABASE_PARAMETERS_FORMAT_STRING
        parameters_str = str(tuple(map(lambda f: DATABASE_PARAMETERS_FORMAT_STRING.format(f), parameters)))
        parameters_str = parameters_str.replace("'", '')
        if eval_f:
            commands += ['    cf.f({})'.format(parameters_str)]
        if eval_df:
            commands += ['    cf.df({})'.format(parameters_str)]

        script_str = os.linesep.join(commands)
        script_str = script_str.replace('array', 'np.array')
        
        f = open(python_script_file, mode='w')
        f.write(script_str)
        util.io.fs.flush_and_close(f)

        ## prepare run command and write job file
        def export_env_command(env_name):
            try:
                env_value = util.io.env.load(env_name)
            except util.io.env.EnvironmentLookupError:
                return ''
            else:
                return 'export {env_name}={env_value}'.format(env_name=env_name, env_value=env_value)
        env_names = [simulation.constants.BASE_DIR_ENV_NAME, simulation.constants.SIMULATION_OUTPUT_DIR_ENV_NAME, simulation.constants.METOS3D_DIR_ENV_NAME, measurements.constants.BASE_DIR_ENV_NAME, util.batch.universal.system.BATCH_SYSTEM_ENV_NAME, util.io.env.PYTHONPATH_ENV_NAME]
        env_commands = [export_env_command(env_name) for env_name in env_names]
        env_commands = [env_command for env_command in env_commands if len(env_command) > 0]
        export_env_command = os.linesep.join(env_commands)
            
        python_command = util.batch.universal.system.BATCH_SYSTEM.commands['python']
        run_command = '{python_command} {python_script_file}'.format(python_command=python_command, python_script_file=python_script_file)
        
        super().write_job_file(run_command, pre_run_command=export_env_command, modules=['intel'])
예제 #2
0
파일: job.py 프로젝트: jor-/simulation
    def write_job_file(self, model_name, model_parameters, years, tolerance=None, time_step=1, total_concentration_factor=1, write_trajectory=False, tracer_input_dir=None, job_setup=None):
        from simulation.model.constants import JOB_OPTIONS_FILENAME, JOB_MEMORY_GB, DATABASE_PARAMETERS_FORMAT_STRING,  METOS_T_DIM, METOS_DATA_DIR, METOS_SIM_FILE, MODEL_DEFAULT_INITIAL_CONCENTRATION, MODEL_TRACER

        logger.debug('Initialising job with years {}, tolerance {}, time step {}, total concentration factor {}, tracer_input_dir {} and job_setup {}.'.format(years, tolerance, time_step, total_concentration_factor, tracer_input_dir, job_setup))

        ## check input
        if not time_step in simulation.model.constants.METOS_TIME_STEPS:
            raise ValueError('Wrong time_step in model options. Time step has to be in {} .'.format(time_step, simulation.model.constants.METOS_TIME_STEPS))
        assert simulation.model.constants.METOS_T_DIM % time_step == 0

        if years < 0:
            raise ValueError('Years must be greater or equal 0, but it is {} .'.format(years))
        if tolerance < 0:
            raise ValueError('Tolerance must be greater or equal 0, but it is {} .'.format(tolerance))
        if total_concentration_factor < 0:
            raise ValueError('Total_concentration_factor must be greater or equal 0, but it is {} .'.format(total_concentration_factor))

        ## unpack job setup
        if job_setup is not None:
            try:
                job_name = job_setup['name']
            except KeyError:
                job_name = 'Metos3D'
            try:
                nodes_setup = job_setup['nodes_setup']
            except KeyError:
                nodes_setup = None
        else:
            job_name = ''
            nodes_setup = None

        ## prepare job name
        if len(job_name) > 0:
            job_name += '_'
        job_name += '{}_{}_{}'.format(model_name, years, time_step)

        ## use best node setup if no node setup passed
        if nodes_setup is None:
            nodes_setup = util.batch.universal.system.NodeSetup()

        ## check/set memory
        if nodes_setup.memory is None:
            nodes_setup.memory = simulation.model.constants.JOB_MEMORY_GB
        elif nodes_setup.memory < simulation.model.constants.JOB_MEMORY_GB:
            logger.warn('The chosen memory {} is below the needed memory {}. Changing to needed memory.'.format(nodes_setup.memory, simulation.model.constants.JOB_MEMORY_GB))
            nodes_setup.memory = simulation.model.constants.JOB_MEMORY_GB

        ## check/set walltime
        sec_per_year = np.exp(- (nodes_setup.nodes * nodes_setup.cpus) / (6*16)) * 10 + 2.5
        sec_per_year /= time_step**(1/2)
        estimated_walltime_hours = np.ceil(years * sec_per_year / 60**2)
        logger.debug('The estimated walltime for {} nodes with {} cpus, {} years and time step {} is {} hours.'.format(nodes_setup.nodes, nodes_setup.cpus, years, time_step, estimated_walltime_hours))
        if nodes_setup.walltime is None:
            nodes_setup.walltime = estimated_walltime_hours
        else:
            if nodes_setup.walltime < estimated_walltime_hours:
                logger.debug('The chosen walltime {} for the job with {} years, {} nodes and {} cpus is below the estimated walltime {}.'.format(nodes_setup.walltime, years, nodes_setup.nodes, nodes_setup.cpus, estimated_walltime_hours))

        ## check/set min cpus
        if nodes_setup.total_cpus_min is None:
            nodes_setup.total_cpus_min = min(int(np.ceil(years/20)), 32)

        ## check/set max nodes
        if nodes_setup.nodes_max is None and years <= 1:
            nodes_setup.nodes_max = 1


        ## init job
        super().init_job_file(job_name, nodes_setup)


        ## get output dir
        output_dir = self.output_dir
        output_dir_not_expanded = os.path.join(self.output_dir_not_expanded, '') # ending with separator


        ## set model options
        opt = self.options

        model_parameters = np.asarray(model_parameters, dtype=np.float64)
        assert len(model_parameters) == 7
        
        opt['/model/tracer'] = MODEL_TRACER[model_name]
        
        opt['/model/total_concentration_factor'] = total_concentration_factor
        opt['/model/parameters'] = model_parameters
        
        time_steps_per_year = int(METOS_T_DIM / time_step)
        opt['/model/time_step_multiplier'] = time_step
        opt['/model/time_steps_per_year'] = time_steps_per_year
        opt['/model/time_step'] = 1 / time_steps_per_year
        

        ## set metos3d options
        opt['/metos3d/data_dir'] = METOS_DATA_DIR
        opt['/metos3d/sim_file'] = METOS_SIM_FILE
        opt['/metos3d/years'] = years
        opt['/metos3d/write_trajectory'] = write_trajectory
        if tolerance is not None:
            opt['/metos3d/tolerance'] = tolerance

        if not write_trajectory:
            opt['/metos3d/tracer_output_dir'] = output_dir_not_expanded
        else:
            tracer_output_dir = os.path.join(output_dir, 'trajectory/')
            os.makedirs(tracer_output_dir, exist_ok=True)
            tracer_output_dir_not_expanded = os.path.join(output_dir_not_expanded, 'trajectory/')
            opt['/metos3d/tracer_output_dir'] = tracer_output_dir_not_expanded

        opt['/metos3d/output_dir'] = output_dir_not_expanded
        opt['/metos3d/option_file'] = os.path.join(output_dir_not_expanded, 'metos3d_options.txt')
        opt['/metos3d/debuglevel'] = 1
        opt['/metos3d/output_filenames'] = ['{}_output.petsc'.format(tracer) for tracer in opt['/model/tracer']]

        if tracer_input_dir is not None:
            opt['/model/tracer_input_dir'] = tracer_input_dir
            opt['/metos3d/tracer_input_dir'] = output_dir_not_expanded

            opt['/metos3d/input_filenames'] = ['{}_input.petsc'.format(tracer) for tracer in opt['/model/tracer']]
            
            if total_concentration_factor == 1:
                tracer_input_dir = os.path.relpath(self.tracer_input_dir, start=output_dir)
                for i in range(len(opt['/model/tracer'])):
                    tracer_input_base_file = os.path.join(tracer_input_dir, opt['metos3d/output_filenames'][i])
                    tracer_input_result_file = os.path.join(output_dir, opt['/metos3d/input_filenames'][i])
                    os.symlink(tracer_input_base_file, tracer_input_result_file)
            else:
                for i in range(len(opt['/model/tracer'])):
                    tracer_input_base_file = os.path.join(self.tracer_input_dir, opt['metos3d/output_filenames'][i])
                    tracer_input_result_file = os.path.join(output_dir, opt['/metos3d/input_filenames'][i])
                    tracer_input = util.petsc.universal.load_petsc_vec_to_numpy_array(tracer_input_base_file)
                    tracer_input = tracer_input * total_concentration_factor
                    util.petsc.universal.save_numpy_array_to_petsc_vec(tracer_input_result_file, tracer_input)
        else:
            initial_concentration = MODEL_DEFAULT_INITIAL_CONCENTRATION[model_name] * total_concentration_factor
            opt['/model/initial_concentrations'] = initial_concentration
        

        model_parameters_string = ','.join(map(lambda f: DATABASE_PARAMETERS_FORMAT_STRING.format(f), model_parameters))
        opt['/metos3d/parameters_string'] = model_parameters_string


        ## write metos3d option file
        f = open(opt['/metos3d/option_file'], mode='w')

        f.write('# debug \n')
        f.write('-Metos3DDebugLevel                      {:d} \n\n'.format(opt['/metos3d/debuglevel']))

        f.write('# geometry \n')
        f.write('-Metos3DGeometryType                    Profile \n')
        f.write('-Metos3DProfileInputDirectory           {}/Geometry/ \n'.format(opt['/metos3d/data_dir']))
        f.write('-Metos3DProfileIndexStartFile           gStartIndices.bin \n')
        f.write('-Metos3DProfileIndexEndFile             gEndIndices.bin \n\n')

        f.write('# bgc tracer \n')
        f.write('-Metos3DTracerCount                     2 \n')

        try:
            f.write('-Metos3DTracerInputDirectory            {} \n'.format(opt['/metos3d/tracer_input_dir']))
            f.write('-Metos3DTracerInitFile                  {} \n'.format(','.join(map(str, opt['/metos3d/input_filenames']))))
        except KeyError:
            f.write('-Metos3DTracerInitValue                 {},{} \n'.format(*opt['/model/initial_concentrations']))

        f.write('-Metos3DTracerOutputDirectory           {} \n'.format(opt['/metos3d/tracer_output_dir']))
        f.write('-Metos3DTracerOutputFile                {} \n\n'.format(','.join(map(str, opt['/metos3d/output_filenames']))))

        f.write('# bgc parameter \n')
        f.write('-Metos3DParameterCount                  {:d} \n'.format(len(opt['/model/parameters'])))
        f.write('-Metos3DParameterValue                  {} \n\n'.format(opt['/metos3d/parameters_string']))

        f.write('# bgc boundary conditions \n')
        f.write('-Metos3DBoundaryConditionCount          2 \n')
        f.write('-Metos3DBoundaryConditionInputDirectory {}/Forcing/BoundaryCondition/ \n'.format(opt['/metos3d/data_dir']))
        f.write('-Metos3DBoundaryConditionName           Latitude,IceCover \n')
        f.write('-Metos3DLatitudeCount                   1 \n')
        f.write('-Metos3DLatitudeFileFormat              latitude.petsc \n')
        f.write('-Metos3DIceCoverCount                   12 \n')
        f.write('-Metos3DIceCoverFileFormat              fice_$02d.petsc \n\n')

        f.write('# bgc domain conditions \n')
        f.write('-Metos3DDomainConditionCount            2 \n')
        f.write('-Metos3DDomainConditionInputDirectory   {}/Forcing/DomainCondition/ \n'.format(opt['/metos3d/data_dir']))
        f.write('-Metos3DDomainConditionName             LayerDepth,LayerHeight \n')
        f.write('-Metos3DLayerDepthCount                 1 \n')
        f.write('-Metos3DLayerDepthFileFormat            z.petsc \n\n')
        f.write('-Metos3DLayerHeightCount                1 \n')
        f.write('-Metos3DLayerHeightFileFormat           dz.petsc \n')

        f.write('# transport \n')
        f.write('-Metos3DTransportType                   Matrix \n')
        f.write('-Metos3DMatrixInputDirectory            {}/Transport/Matrix5_4/{:d}dt/ \n'.format(opt['/metos3d/data_dir'], opt['/model/time_step_multiplier']))
        f.write('-Metos3DMatrixCount                     12 \n')
        f.write('-Metos3DMatrixExplicitFileFormat        Ae_$02d.petsc \n')
        f.write('-Metos3DMatrixImplicitFileFormat        Ai_$02d.petsc \n\n')

        f.write('# time stepping \n')
        f.write('-Metos3DTimeStepStart                   0.0 \n')
        f.write('-Metos3DTimeStepCount                   {:d} \n'.format(opt['/model/time_steps_per_year']))
        f.write('-Metos3DTimeStep                        {:.18f} \n\n'.format(opt['/model/time_step']))

        f.write('# solver \n')
        f.write('-Metos3DSolverType                      Spinup \n')
        f.write('-Metos3DSpinupMonitor \n')
        try:
            f.write('-Metos3DSpinupTolerance                 {:f} \n'.format(opt['/metos3d/tolerance']))
        except KeyError:
            pass
        f.write('-Metos3DSpinupCount                     {:d} \n'.format(opt['/metos3d/years']))

        if opt['/metos3d/write_trajectory']:
            f.write('-Metos3DSpinupMonitorFileFormatPrefix   sp$0004d-,ts$0004d- \n')
            f.write('-Metos3DSpinupMonitorModuloStep         1,1 \n')

        util.io.fs.flush_and_close(f)


        ## write job file
        run_command = '{} {} \n'.format(opt['/metos3d/sim_file'], opt['/metos3d/option_file'])
        super().write_job_file(run_command, modules=['intel', 'intelmpi', 'petsc'])

        logger.debug('Job initialised.')