def parse_fl_plan(plan_path, logger=None): if logger is None: logger = logging.getLogger(__name__) flplan = load_yaml(plan_path) # ensure 'init_kwargs' appears in each top-level block for k in flplan.keys(): if 'init_kwargs' not in flplan[k]: flplan[k]['init_kwargs'] = {} # collect all the plan filepaths used plan_files = [plan_path] # walk the top level keys for defaults_file in sorted order for k in sorted(flplan.keys()): defaults_file = flplan[k].get('defaults_file') if defaults_file is not None: defaults_file = os.path.join(os.path.dirname(plan_path), defaults_file) logger.info( "Using FLPlan defaults for section '{}' from file '{}'".format( k, defaults_file)) defaults = load_yaml(defaults_file) if 'init_kwargs' in defaults: defaults['init_kwargs'].update(flplan[k]['init_kwargs']) flplan[k]['init_kwargs'] = defaults['init_kwargs'] defaults.update(flplan[k]) flplan[k] = defaults plan_files.append(defaults_file) # create the hash of these files flplan_fname = os.path.splitext(os.path.basename(plan_path))[0] flplan_hash = hash_files(plan_files, logger=logger) federation_uuid = '{}_{}'.format(flplan_fname, flplan_hash[:8]) aggregator_uuid = 'aggregator_{}'.format(federation_uuid) flplan['aggregator_object_init']['init_kwargs'][ 'aggregator_uuid'] = aggregator_uuid flplan['aggregator_object_init']['init_kwargs'][ 'federation_uuid'] = federation_uuid flplan['collaborator_object_init']['init_kwargs'][ 'aggregator_uuid'] = aggregator_uuid flplan['collaborator_object_init']['init_kwargs'][ 'federation_uuid'] = federation_uuid flplan['hash'] = flplan_hash logger.info("Parsed plan:\n{}".format(yaml.dump(flplan))) return flplan
def main(plan, collaborators_file, data_config_fname, logging_config_path, logging_default_level, logging_directory, model_device, **kwargs): """Run the federation simulation from the federation (FL) plan. Runs a federated training from the federation (FL) plan but creates the aggregator and collaborators on the same compute node. This allows the developer to test the model and data loaders before running on the remote collaborator nodes. Args: plan: The Federation (FL) plan (YAML file) collaborators_file: The file listing the collaborators data_config_fname: The file describing where the dataset is located on the collaborators logging_config_path: The log file logging_default_level: The log level **kwargs: Variable parameters to pass to the function """ # FIXME: consistent filesystem (#15) # establish location for fl plan as well as # where to get and write model protobufs script_dir = os.path.dirname(os.path.realpath(__file__)) base_dir = os.path.join(script_dir, 'federations') plan_dir = os.path.join(base_dir, 'plans') weights_dir = os.path.join(base_dir, 'weights') metadata_dir = os.path.join(base_dir, 'metadata') collaborators_dir = os.path.join(base_dir, 'collaborator_lists') logging_config_path = os.path.join(script_dir, logging_config_path) logging_directory = os.path.join(script_dir, logging_directory) setup_logging(path=logging_config_path, default_level=logging_default_level, logging_directory=logging_directory) # load the flplan, local_config and collaborators file flplan = parse_fl_plan(os.path.join(plan_dir, plan)) local_config = load_yaml(os.path.join(base_dir, data_config_fname)) collaborator_common_names = load_yaml(os.path.join(collaborators_dir, collaborators_file))['collaborator_common_names'] # TODO: Run a loop here over various parameter values and iterations # TODO: implement more than just saving init, best, and latest model federate(flplan, local_config, collaborator_common_names, base_dir, weights_dir, metadata_dir, model_device)
def main(plan, native_model_weights_filepath, collaborators_file, feature_shape, n_classes, data_config_fname, logging_config_path, logging_default_level, model_device): """Creates a protobuf file of the initial weights for the model Uses the federation (FL) plan to create an initial weights file for the federation. Args: plan: The federation (FL) plan filename native_model_weights_filepath: A framework-specific filepath. Path will be relative to the working directory. collaborators_file: feature_shape: The input shape to the model data_config_fname: The data configuration file (defines where the datasets are located) logging_config_path: The log path logging_default_level (int): The default log level """ setup_logging(path=logging_config_path, default_level=logging_default_level) logger = logging.getLogger(__name__) # FIXME: consistent filesystem (#15) script_dir = os.path.dirname(os.path.realpath(__file__)) base_dir = os.path.join(script_dir, 'federations') plan_dir = os.path.join(base_dir, 'plans') weights_dir = os.path.join(base_dir, 'weights') # ensure the weights dir exists if not os.path.exists(weights_dir): print('creating folder:', weights_dir) os.makedirs(weights_dir) # parse the plan and local config flplan = parse_fl_plan(os.path.join(plan_dir, plan)) local_config = load_yaml(os.path.join(base_dir, data_config_fname)) # get the output filename fpath = os.path.join(weights_dir, flplan['aggregator_object_init']['init_kwargs']['init_model_fname']) # create the data object for models whose architecture depends on the feature shape if feature_shape is None: if collaborators_file is None: sys.exit("You must specify either a feature shape or a collaborator list in order for the script to determine the input layer shape") # FIXME: this will ultimately run in a governor environment and should not require any data to work # pick the first collaborator to create the data and model (could be any) collaborator_common_name = load_yaml(os.path.join(base_dir, 'collaborator_lists', collaborators_file))['collaborator_common_names'][0] data = create_data_object(flplan, collaborator_common_name, local_config, n_classes=n_classes) else: data = get_object('openfl.data.dummy.randomdata', 'RandomData', feature_shape=feature_shape) logger.info('Using data object of type {} and feature shape {}'.format(type(data), feature_shape)) # create the model object and compression pipeline wrapped_model = create_model_object(flplan, data, model_device=model_device) compression_pipeline = create_compression_pipeline(flplan) # determine if we need to store the optimizer variables # FIXME: what if this key is missing? try: opt_treatment = OptTreatment[flplan['collaborator_object_init']['init_kwargs']['opt_treatment']] except KeyError: # FIXME: this error message should use the exception to determine the missing key and the Enum to display the options dynamically sys.exit("FL plan must specify ['collaborator_object_init']['init_kwargs']['opt_treatment'] as [RESET|CONTINUE_LOCAL|CONTINUE_GLOBAL]") # FIXME: this should be an "opt_treatment requires parameters type check rather than a magic string" with_opt_vars = opt_treatment == OptTreatment['CONTINUE_GLOBAL'] if native_model_weights_filepath is not None: wrapped_model.load_native(native_model_weights_filepath) tensor_dict_split_fn_kwargs = wrapped_model.tensor_dict_split_fn_kwargs or {} tensor_dict, holdout_params = split_tensor_dict_for_holdouts(logger, wrapped_model.get_tensor_dict(with_opt_vars=with_opt_vars), **tensor_dict_split_fn_kwargs) logger.warn('Following paramters omitted from global initial model, '\ 'local initialization will determine values: {}'.format(list(holdout_params.keys()))) model_proto = construct_proto(tensor_dict=tensor_dict, model_id=wrapped_model.__class__.__name__, model_version=0, is_delta=False, delta_from_version=-1, compression_pipeline=compression_pipeline) dump_proto(model_proto=model_proto, fpath=fpath) logger.info("Created initial weights file: {}".format(fpath))
def main(plan, resume, collaborators_file, data_config_fname, validate_without_patches_flag, data_in_memory_flag, data_queue_max_length, data_queue_num_workers, torch_threads, kmp_affinity_flag, logging_config_path, logging_default_level, logging_directory, model_device, **kwargs): """Run the federation simulation from the federation (FL) plan. Runs a federated training from the federation (FL) plan but creates the aggregator and collaborators on the same compute node. This allows the developer to test the model and data loaders before running on the remote collaborator nodes. Args: plan : The Federation (FL) plan (YAML file) resume : Whether or not the aggregator is told to resume from previous best collaborators_file : The file listing the collaborators data_config_fname : The file describing where the dataset is located on the collaborators validate_withouut_patches_flag : controls a model init kwarg data_in_memory_flag : controls a data init kwarg data_queue_max_length : controls a data init kwarg data_queue_num_workers : controls a data init kwarg torch_threads : number of threads to set in torch kmp_affinity_flag : controls a model init kwarg logging_config_path : The log file logging_default_level : The log level **kwargs : Variable parameters to pass to the function """ # FIXME: consistent filesystem (#15) # establish location for fl plan as well as # where to get and write model protobufs script_dir = os.path.dirname(os.path.realpath(__file__)) base_dir = os.path.join(script_dir, 'federations') plan_dir = os.path.join(base_dir, 'plans') weights_dir = os.path.join(base_dir, 'weights') metadata_dir = os.path.join(base_dir, 'metadata') collaborators_dir = os.path.join(base_dir, 'collaborator_lists') logging_config_path = os.path.join(script_dir, logging_config_path) logging_directory = os.path.join(script_dir, logging_directory) setup_logging(path=logging_config_path, default_level=logging_default_level, logging_directory=logging_directory) # load the flplan, local_config and collaborators file flplan = parse_fl_plan(os.path.join(plan_dir, plan)) # FIXME: Find a better solution for passing model and data init kwargs model_init_kwarg_keys = [ 'validate_without_patches', 'torch_threads', 'kmp_affinity' ] model_init_kwarg_vals = [ validate_without_patches_flag, torch_threads, kmp_affinity_flag ] for key, value in zip(model_init_kwarg_keys, model_init_kwarg_vals): if (value is not None) and (value != False): flplan['model_object_init']['init_kwargs'][key] = value data_init_kwarg_keys = ['in_memory', 'q_max_length', 'q_num_workers'] data_init_kwarg_vals = [ data_in_memory_flag, data_queue_max_length, data_queue_num_workers ] for key, value in zip(data_init_kwarg_keys, data_init_kwarg_vals): if (value is not None) and (value != False): flplan['data_object_init']['init_kwargs'][key] = value local_config = load_yaml(os.path.join(base_dir, data_config_fname)) collaborator_common_names = load_yaml( os.path.join(collaborators_dir, collaborators_file))['collaborator_common_names'] # TODO: Run a loop here over various parameter values and iterations # TODO: implement more than just saving init, best, and latest model federate(flplan=flplan, resume=resume, local_config=local_config, collaborator_common_names=collaborator_common_names, base_dir=base_dir, weights_dir=weights_dir, metadata_dir=metadata_dir, model_device=model_device)