Esempio n. 1
0
def create_pickled_cluster_input_file(storage_path, mapper=None, aggregator=None, reducer=None, model_class=None):

    from molnsutil import molns_cloudpickle

    if model_class is None and mapper is None and aggregator is None and reducer is None:
        return None

    unpickled_list = dict(model_class=molns_cloudpickle.dumps(model_class), mapper=mapper, aggregator=aggregator,
                          reducer=reducer)

    with open(storage_path, "wb") as input_file:
        molns_cloudpickle.dump(unpickled_list, input_file)
Esempio n. 2
0
    def run_async(self, mapper=None, aggregator=None, reducer=None, number_of_trajectories=None, result_list=None,
                  store_realizations=True, add_realizations=False, realizations_storage_directory=None):
        """ Creates a new remote_job and deploys it on the cluster. Returns RemoteJob deployed. """

        # Verify that given parameters are not referenced from other modules, as that produces referenced cloudpickling.
        #calling_module = inspect.getmodule(inspect.stack()[1][0])
        #logging.info("Caller module: {0}".format(calling_module))
        # calling_module_name = calling_module.__name__ if calling_module is not None else None
        # ClusterParameterSweep.check_ingredients_to_be_pickled(self.model_cls, mapper, aggregator, reducer,
        #                                                      module_name=calling_module_name)

        # Create new remote job.
        job_id = create_new_id()

        input_file_dir = os.path.join(constants.ClusterJobsScratchDir, constants.ClusterJobFilePrefix + job_id)
        if not os.path.exists(input_file_dir):
            os.makedirs(input_file_dir)

        # Set input data according to the operation being performed.
        input_data = {'number_of_trajectories': number_of_trajectories, 'params': self.parameters,
                      'store_realizations': store_realizations, 'num_engines': self.num_engines,
                      'is_parameter_sweep': self.is_parameter_sweep}
        if add_realizations is True:
            input_data['add_realizations'] = True
        if realizations_storage_directory is not None:
            input_data["result_list"] = result_list
            input_data['realizations_storage_directory'] = realizations_storage_directory

        # Write job input file.
        input_file_path = os.path.join(input_file_dir, constants.ClusterExecInputFile)
        with open(input_file_path, "wb") as input_file:
            cloudpickle.dump(input_data, input_file)

        # Create pickled_cluster_input_file.
        pickled_cluster_input_file = os.path.join(input_file_dir, constants.PickledClusterInputFile)
        create_pickled_cluster_input_file(storage_path=pickled_cluster_input_file, mapper=mapper,
                                          aggregator=aggregator,
                                          model_class=self.model_cls, reducer=reducer)

        remote_job = RemoteJob(input_files=[input_file_path, pickled_cluster_input_file],
                               is_parameter_sweep=self.is_parameter_sweep,
                               date=str(datetime.datetime.now()), remote_host=self.remote_host, remote_job_id=job_id,
                               local_scratch_dir=input_file_dir, num_engines=self.num_engines)

        # Deploy remote job.
        self.cluster_deploy.deploy_job_to_cluster(remote_job)

        logging.info("Deployed\n{0}".format(str(remote_job)))

        return remote_job
Esempio n. 3
0
def create_pickled_cluster_input_file(storage_path,
                                      mapper=None,
                                      aggregator=None,
                                      reducer=None,
                                      model_class=None):

    from molnsutil import molns_cloudpickle

    if model_class is None and mapper is None and aggregator is None and reducer is None:
        return None

    unpickled_list = dict(model_class=molns_cloudpickle.dumps(model_class),
                          mapper=mapper,
                          aggregator=aggregator,
                          reducer=reducer)

    with open(storage_path, "wb") as input_file:
        molns_cloudpickle.dump(unpickled_list, input_file)
Esempio n. 4
0
        num_of_trajectories = unpickled_list['chunk_size']
        seed = unpickled_list['seed']
        params = unpickled_list['pset']
        param_set_id_ = unpickled_list['pndx']

        if not unpickled_list.get('model_class', False):
            with open(constants.pickled_cluster_input_file, "rb") as inp:
                unpickled_cluster_input = pickle.load(inp)
                model_cls = unpickled_cluster_input['model_class']
                mapper_fn = unpickled_cluster_input['mapper']
                aggregator_fn = unpickled_cluster_input['aggregator']
        else:
            model_cls = unpickled_list['model_class']
            mapper_fn = unpickled_list['mapper']
            aggregator_fn = unpickled_list['aggregator']

        result = run_ensemble_map_and_aggregate(
            model_class=model_cls,
            parameters=params,
            param_set_id=param_set_id_,
            seed_base=seed,
            number_of_trajectories=num_of_trajectories,
            mapper=mapper_fn,
            aggregator=aggregator_fn,
            cluster_import=True)
        with open(constants.job_output_file_name, "wb") as output:
            cloudpickle.dump(result, output)
    except Exception as errors:
        with open(constants.job_error_file_name, "wb") as error:
            error.write(str(errors))
Esempio n. 5
0
        with open(constants.job_input_file_name, "rb") as inp:
            unpickled_list = pickle.load(inp)

        num_of_trajectories = unpickled_list['pchunk']
        seed = unpickled_list['seed']
        params = unpickled_list['pset']
        param_set_id_ = unpickled_list['pndx']
        storage_mode = unpickled_list['storage_mode']

        if not unpickled_list.get('model_class', False):
            with open(constants.pickled_cluster_input_file, "rb") as inp:
                unpickled_cluster_input = pickle.load(inp)
                model_cls = unpickled_cluster_input['model_class']
        else:
            model_cls = unpickled_list['model_class']

        result = run_ensemble(model_class=model_cls,
                              parameters=params,
                              param_set_id=param_set_id_,
                              seed_base=seed,
                              number_of_trajectories=num_of_trajectories,
                              storage_mode=storage_mode,
                              local_storage_path=os.path.dirname(
                                  os.path.abspath(__file__)),
                              cluster_import=True)
        with open(constants.job_output_file_name, "wb") as output:
            molns_cloudpickle.dump(result, output)
    except Exception as errors:
        with open(constants.job_run_ensemble_error_file_name, "wb") as error:
            error.write(str(errors))
Esempio n. 6
0
if __name__ == "__main__":
    try:
        import molnsutil.constants as constants
        import molnsutil.molns_cloudpickle as molns_cloudpickle

        with open(constants.job_input_file_name, "rb") as inp:
            unpickled_list = pickle.load(inp)

        num_of_trajectories = unpickled_list['pchunk']
        seed = unpickled_list['seed']
        params = unpickled_list['pset']
        param_set_id_ = unpickled_list['pndx']
        storage_mode = unpickled_list['storage_mode']

        if not unpickled_list.get('model_class', False):
            with open(constants.pickled_cluster_input_file, "rb") as inp:
                unpickled_cluster_input = pickle.load(inp)
                model_cls = unpickled_cluster_input['model_class']
        else:
            model_cls = unpickled_list['model_class']

        result = run_ensemble(model_class=model_cls, parameters=params, param_set_id=param_set_id_, seed_base=seed,
                              number_of_trajectories=num_of_trajectories, storage_mode=storage_mode,
                              local_storage_path=os.path.dirname(os.path.abspath(__file__)), cluster_import=True)
        with open(constants.job_output_file_name, "wb") as output:
            molns_cloudpickle.dump(result, output)
    except Exception as errors:
        with open(constants.job_run_ensemble_error_file_name, "wb") as error:
            error.write(str(errors))
        import molnsutil.constants as constants
        import molnsutil.molns_cloudpickle as cloudpickle

        with open(constants.job_input_file_name, "rb") as inp:
            unpickled_list = pickle.load(inp)

        num_of_trajectories = unpickled_list['chunk_size']
        seed = unpickled_list['seed']
        params = unpickled_list['pset']
        param_set_id_ = unpickled_list['pndx']

        if not unpickled_list.get('model_class', False):
            with open(constants.pickled_cluster_input_file, "rb") as inp:
                unpickled_cluster_input = pickle.load(inp)
                model_cls = unpickled_cluster_input['model_class']
                mapper_fn = unpickled_cluster_input['mapper']
                aggregator_fn = unpickled_cluster_input['aggregator']
        else:
            model_cls = unpickled_list['model_class']
            mapper_fn = unpickled_list['mapper']
            aggregator_fn = unpickled_list['aggregator']

        result = run_ensemble_map_and_aggregate(model_class=model_cls, parameters=params, param_set_id=param_set_id_,
                                                seed_base=seed, number_of_trajectories=num_of_trajectories,
                                                mapper=mapper_fn, aggregator=aggregator_fn, cluster_import=True)
        with open(constants.job_output_file_name, "wb") as output:
            cloudpickle.dump(result, output)
    except Exception as errors:
        with open(constants.job_error_file_name, "wb") as error:
            error.write(str(errors))