def test_dmlrunner_communicate_job(config_manager, train_dmlresult_obj, ipfs_client): runner = DMLRunner(config_manager) runner.configure(ipfs_client) comm_job = train_dmlresult_obj.job.copy_constructor() comm_job.job_type = JobTypes.JOB_COMM.name comm_job.key = "test" result = runner.run_job(comm_job) assert result.results["receipt"]
def split_dmlresult_obj(config_manager, mnist_filepath): model_json = make_model_json() runner = DMLRunner(config_manager) split_job = make_split_job( model_json, mnist_filepath, ) split_job.hyperparams['split'] = 0.75 job_results = runner.run_job(split_job) return job_results
def train_dmlresult_obj(config_manager, split_dmlresult_obj, init_dmlresult_obj): runner = DMLRunner(config_manager) initial_weights = init_dmlresult_obj.results['weights'] session_filepath = split_dmlresult_obj.results['session_filepath'] datapoint_count = split_dmlresult_obj.results['datapoint_count'] train_job = make_train_job(make_model_json(), initial_weights, make_hyperparams(split=1), session_filepath, datapoint_count) result = runner.run_job(train_job) return result
def split_dmlresult_obj(config_manager, mnist_uuid, dataset_manager): model_json = make_model_json() runner = DMLRunner(config_manager) mnist_filepath = dataset_manager.get_mappings()[mnist_uuid] split_job = make_split_job( model_json, mnist_filepath, ) split_job.hyperparams['split'] = 0.75 job_results = runner.run_job(split_job) print(job_results) return job_results
def test_dmlrunner_averaging_weights(config_manager, train_dmlresult_obj): runner = DMLRunner(config_manager) avg_job = train_dmlresult_obj.job.copy_constructor() initial_weights = train_dmlresult_obj.results['weights'] assert initial_weights avg_job.weights = initial_weights avg_job.new_weights = initial_weights avg_job.omega = train_dmlresult_obj.results['omega'] avg_job.sigma_omega = avg_job.omega averaged_weights = runner._average(avg_job).results['weights'] assert all( np.allclose(arr1, arr2) for arr1, arr2 in zip(averaged_weights, initial_weights))
def test_dmlrunner_transform_and_split( \ config_manager, small_filepath): model_json = make_model_json() runner = DMLRunner(config_manager) split_job = make_split_job(model_json, small_filepath) split_job.hyperparams['split'] = 0.75 job_results = runner.run_job(split_job) session_filepath = job_results.results['session_filepath'] assert os.path.isdir(session_filepath), \ "Session folder does not exist!" train_filepath = os.path.join(session_filepath, 'train.csv') test_filepath = os.path.join(session_filepath, 'test.csv') assert os.path.isfile(train_filepath) and os.path.isfile(test_filepath), \ "Training and test set not created!" train = pd.read_csv(train_filepath) test = pd.read_csv(test_filepath) assert len(train) == 6 and len(test) == 2, \ "Train test split was not performed correctly."
def test_dmlrunner_validate_job_returns_stats( \ config_manager, mnist_filepath, train_dmlresult_obj): model_json = make_model_json() hyperparams = make_hyperparams() runner = DMLRunner(config_manager) job_results = train_dmlresult_obj session_filepath = job_results.job.session_filepath datapoint_count = job_results.job.datapoint_count result = train_dmlresult_obj assert result.status == 'successful' results = result.results new_weights = results['weights'] omega = results['omega'] train_stats = results['train_stats'] hyperparams['split'] = 1 - hyperparams['split'] validate_job = make_validate_job(model_json, new_weights, hyperparams, session_filepath, datapoint_count) result = runner.run_job(validate_job) assert result.status == 'successful' results = result.results val_stats = results['val_stats'] assert result.job.job_type is JobTypes.JOB_VAL.name assert type(val_stats) == dict
def test_dmlrunner_same_train_job_with_split_1( \ config_manager, mnist_filepath): model_json = make_model_json() hyperparams = make_hyperparams(split=1) runner = DMLRunner(config_manager) initialize_job = make_initialize_job(model_json) initial_weights = runner.run_job(initialize_job).results['weights'] split_job = make_split_job(model_json, mnist_filepath) job_results = runner.run_job(split_job) session_filepath = job_results.results['session_filepath'] datapoint_count = job_results.results['datapoint_count'] train_job = make_train_job(model_json, initial_weights, hyperparams, session_filepath, datapoint_count) result = runner.run_job(train_job) assert result.status == 'successful' results = result.results new_weights = results['weights'] omega = results['omega'] train_stats = results['train_stats'] assert result.job.job_type is JobTypes.JOB_TRAIN.name assert type(new_weights) == list assert type(new_weights[0]) == np.ndarray assert type(omega) == int or type(omega) == float assert type(train_stats) == dict
def bootstrap(repo_id="testRepo", api_key="demo-api-key", test=False): """ Bootstraps the data provider unix service. It instantiates the Configuration Manager, Dataset Manager, Communication Manager and the Execution Pipeline. """ # 1. Set up Configuration Manager. config_manager = ConfigurationManager() config_manager.bootstrap() runner = DMLRunner(config_manager) optimizer = FederatedAveragingOptimizer(runner, repo_id) loop = asyncio.get_event_loop() websocket_client = WebSocketClient(optimizer, config_manager, repo_id, api_key, test) # mappings = dataset_manager.get_mappings() # 7. Wait for the threads to end. # TODO: Need to make it work as a daemon. loop.run_until_complete(websocket_client.prepare_dml())
def __init__(self, config_manager): """ Initializes the instance. """ logging.info("Setting up scheduler...") self.event = Event() self.processed = [] self.history = [] config = config_manager.get_config() self.frequency_in_mins = config.getint("SCHEDULER", "frequency_in_mins") self.num_runners = config.getint("SCHEDULER", "num_runners") self.max_tries = config.getint("SCHEDULER", "max_tries") self.queue = Queue(self.num_runners * self.num_runners) multiprocessing.set_start_method('spawn', force=True) self.pool = Pool(processes=self.num_runners) self.runners = [ DMLRunner(config_manager) for _ in range(self.num_runners) ] self.current_jobs = [None for _ in range(self.num_runners)] self.current_results = [None for _ in range(self.num_runners)] logging.info("Scheduler is set up!")
def init_dmlresult_obj(config_manager, small_filepath): runner = DMLRunner(config_manager) initialize_job = make_initialize_job(make_model_json(), small_filepath) result = runner.run_job(initialize_job) return result
def runner(config_manager): runner = DMLRunner(config_manager) return runner
def runner(config_manager, ipfs_client): runner = DMLRunner(config_manager) runner.configure(ipfs_client=ipfs_client) return runner
def init_dmlresult_obj(config_manager, small_uuid, dataset_manager): runner = DMLRunner(config_manager) small_filepath = dataset_manager.get_mappings()[small_uuid] initialize_job = make_initialize_job(make_model_json(), small_filepath) result = runner.run_job(initialize_job) return result