def test_dmlrunner_communicate_job(config_manager, train_dmlresult_obj,
                                   ipfs_client):
    runner = DMLRunner(config_manager)
    runner.configure(ipfs_client)
    comm_job = train_dmlresult_obj.job.copy_constructor()
    comm_job.job_type = JobTypes.JOB_COMM.name
    comm_job.key = "test"
    result = runner.run_job(comm_job)
    assert result.results["receipt"]
Beispiel #2
0
def split_dmlresult_obj(config_manager, mnist_filepath):
    model_json = make_model_json()
    runner = DMLRunner(config_manager)
    split_job = make_split_job(
        model_json,
        mnist_filepath,
    )
    split_job.hyperparams['split'] = 0.75
    job_results = runner.run_job(split_job)
    return job_results
Beispiel #3
0
def train_dmlresult_obj(config_manager, split_dmlresult_obj,
                        init_dmlresult_obj):
    runner = DMLRunner(config_manager)
    initial_weights = init_dmlresult_obj.results['weights']
    session_filepath = split_dmlresult_obj.results['session_filepath']
    datapoint_count = split_dmlresult_obj.results['datapoint_count']
    train_job = make_train_job(make_model_json(), initial_weights,
                               make_hyperparams(split=1), session_filepath,
                               datapoint_count)
    result = runner.run_job(train_job)
    return result
Beispiel #4
0
def split_dmlresult_obj(config_manager, mnist_uuid, dataset_manager):
    model_json = make_model_json()
    runner = DMLRunner(config_manager)
    mnist_filepath = dataset_manager.get_mappings()[mnist_uuid]
    split_job = make_split_job(
                            model_json, 
                            mnist_filepath,
                        )
    split_job.hyperparams['split'] = 0.75
    job_results = runner.run_job(split_job)
    print(job_results)
    return job_results
def test_dmlrunner_averaging_weights(config_manager, train_dmlresult_obj):
    runner = DMLRunner(config_manager)
    avg_job = train_dmlresult_obj.job.copy_constructor()
    initial_weights = train_dmlresult_obj.results['weights']
    assert initial_weights
    avg_job.weights = initial_weights
    avg_job.new_weights = initial_weights
    avg_job.omega = train_dmlresult_obj.results['omega']
    avg_job.sigma_omega = avg_job.omega
    averaged_weights = runner._average(avg_job).results['weights']
    assert all(
        np.allclose(arr1, arr2)
        for arr1, arr2 in zip(averaged_weights, initial_weights))
def test_dmlrunner_transform_and_split( \
        config_manager, small_filepath):
    model_json = make_model_json()
    runner = DMLRunner(config_manager)
    split_job = make_split_job(model_json, small_filepath)
    split_job.hyperparams['split'] = 0.75
    job_results = runner.run_job(split_job)
    session_filepath = job_results.results['session_filepath']
    assert os.path.isdir(session_filepath), \
        "Session folder does not exist!"
    train_filepath = os.path.join(session_filepath, 'train.csv')
    test_filepath = os.path.join(session_filepath, 'test.csv')
    assert os.path.isfile(train_filepath) and os.path.isfile(test_filepath), \
        "Training and test set not created!"
    train = pd.read_csv(train_filepath)
    test = pd.read_csv(test_filepath)
    assert len(train) == 6 and len(test) == 2, \
        "Train test split was not performed correctly."
def test_dmlrunner_validate_job_returns_stats( \
        config_manager, mnist_filepath, train_dmlresult_obj):
    model_json = make_model_json()
    hyperparams = make_hyperparams()
    runner = DMLRunner(config_manager)
    job_results = train_dmlresult_obj
    session_filepath = job_results.job.session_filepath
    datapoint_count = job_results.job.datapoint_count
    result = train_dmlresult_obj
    assert result.status == 'successful'
    results = result.results
    new_weights = results['weights']
    omega = results['omega']
    train_stats = results['train_stats']
    hyperparams['split'] = 1 - hyperparams['split']
    validate_job = make_validate_job(model_json, new_weights, hyperparams,
                                     session_filepath, datapoint_count)
    result = runner.run_job(validate_job)
    assert result.status == 'successful'
    results = result.results
    val_stats = results['val_stats']
    assert result.job.job_type is JobTypes.JOB_VAL.name
    assert type(val_stats) == dict
def test_dmlrunner_same_train_job_with_split_1( \
        config_manager, mnist_filepath):
    model_json = make_model_json()
    hyperparams = make_hyperparams(split=1)
    runner = DMLRunner(config_manager)
    initialize_job = make_initialize_job(model_json)
    initial_weights = runner.run_job(initialize_job).results['weights']
    split_job = make_split_job(model_json, mnist_filepath)
    job_results = runner.run_job(split_job)
    session_filepath = job_results.results['session_filepath']
    datapoint_count = job_results.results['datapoint_count']
    train_job = make_train_job(model_json, initial_weights, hyperparams,
                               session_filepath, datapoint_count)
    result = runner.run_job(train_job)
    assert result.status == 'successful'
    results = result.results
    new_weights = results['weights']
    omega = results['omega']
    train_stats = results['train_stats']
    assert result.job.job_type is JobTypes.JOB_TRAIN.name
    assert type(new_weights) == list
    assert type(new_weights[0]) == np.ndarray
    assert type(omega) == int or type(omega) == float
    assert type(train_stats) == dict
Beispiel #9
0
def bootstrap(repo_id="testRepo", api_key="demo-api-key", test=False):
    """
    Bootstraps the data provider unix service.

    It instantiates the Configuration Manager, Dataset Manager, Communication
    Manager and the Execution Pipeline.
    """
    # 1. Set up Configuration Manager.
    config_manager = ConfigurationManager()
    config_manager.bootstrap()

    runner = DMLRunner(config_manager)

    optimizer = FederatedAveragingOptimizer(runner, repo_id)

    loop = asyncio.get_event_loop()

    websocket_client = WebSocketClient(optimizer, config_manager, repo_id,
                                       api_key, test)
    # mappings = dataset_manager.get_mappings()

    # 7. Wait for the threads to end.
    # TODO: Need to make it work as a daemon.
    loop.run_until_complete(websocket_client.prepare_dml())
    def __init__(self, config_manager):
        """
		Initializes the instance.
		"""
        logging.info("Setting up scheduler...")
        self.event = Event()
        self.processed = []
        self.history = []

        config = config_manager.get_config()

        self.frequency_in_mins = config.getint("SCHEDULER",
                                               "frequency_in_mins")
        self.num_runners = config.getint("SCHEDULER", "num_runners")
        self.max_tries = config.getint("SCHEDULER", "max_tries")
        self.queue = Queue(self.num_runners * self.num_runners)
        multiprocessing.set_start_method('spawn', force=True)
        self.pool = Pool(processes=self.num_runners)
        self.runners = [
            DMLRunner(config_manager) for _ in range(self.num_runners)
        ]
        self.current_jobs = [None for _ in range(self.num_runners)]
        self.current_results = [None for _ in range(self.num_runners)]
        logging.info("Scheduler is set up!")
Beispiel #11
0
def init_dmlresult_obj(config_manager, small_filepath):
    runner = DMLRunner(config_manager)
    initialize_job = make_initialize_job(make_model_json(), small_filepath)
    result = runner.run_job(initialize_job)
    return result
Beispiel #12
0
def runner(config_manager):
    runner = DMLRunner(config_manager)
    return runner
Beispiel #13
0
def runner(config_manager, ipfs_client):
    runner = DMLRunner(config_manager)
    runner.configure(ipfs_client=ipfs_client)
    return runner
Beispiel #14
0
def init_dmlresult_obj(config_manager, small_uuid, dataset_manager):
    runner = DMLRunner(config_manager)
    small_filepath = dataset_manager.get_mappings()[small_uuid]
    initialize_job = make_initialize_job(make_model_json(), small_filepath)
    result = runner.run_job(initialize_job)
    return result