def test_blockchain_utils_setter_multiple_values(config, ipfs_client): get_val_before = getter(client=ipfs_client, key=TEST_MULTIPLE_KEY, local_state=[], port=config.getint('BLOCKCHAIN', 'http_port'), timeout=config.getint('BLOCKCHAIN', 'timeout')) tx_receipt = setter( client=ipfs_client, key=TEST_MULTIPLE_KEY, port=config.getint('BLOCKCHAIN', 'http_port'), value=TEST_VALUE, ) assert tx_receipt, "Setting failed" tx_receipt = setter( client=ipfs_client, key=TEST_MULTIPLE_KEY, port=config.getint('BLOCKCHAIN', 'http_port'), value=TEST_VALUE, ) assert tx_receipt, "Setting failed" get_val_after = getter(client=ipfs_client, key=TEST_MULTIPLE_KEY, local_state=[], port=config.getint('BLOCKCHAIN', 'http_port'), timeout=config.getint('BLOCKCHAIN', 'timeout')) assert get_val_after == get_val_before + [TEST_VALUE, TEST_VALUE], \ "Multi-setter failed!"
def test_federated_learning_two_clients_automated(new_session_event, new_session_key, new_session_key_two, config_manager_two, config_manager, ipfs_client): """ Tests fully automated federated learning. """ # Set up first client communication_manager, blockchain_gateway, scheduler = setup_client( config_manager, ipfs_client) # Set up second client communication_manager_2, blockchain_gateway_2, scheduler_2 = setup_client( config_manager_two, ipfs_client) # (0) Someone sends decentralized learning event to the chain tx_receipt = setter(client=blockchain_gateway._client, key=new_session_key, port=blockchain_gateway._port, value=new_session_event, flag=True, round_num=0) tx_receipt_two = setter(client=blockchain_gateway._client, key=new_session_key_two, port=blockchain_gateway._port, value=new_session_event, flag=True, round_num=0) assert tx_receipt assert tx_receipt_two scheduler.start_cron(period_in_mins=0.01) scheduler_2.start_cron(period_in_mins=0.01) blockchain_gateway.start_cron(period_in_mins=0.01) blockchain_gateway_2.start_cron(period_in_mins=0.01) timeout = 50 + time.time() while time.time() < timeout and (len(scheduler.processed) != 8 or len(scheduler_2.processed) != 8): time.sleep(1) scheduler.stop_cron() scheduler_2.stop_cron() blockchain_gateway.stop_cron() blockchain_gateway_2.stop_cron() assert len(scheduler.processed) == 8, \ "Jobs {} failed/not completed in time!".format([ result.job.job_type for result in scheduler.processed]) assert len(scheduler_2.processed) == 8, \ "Jobs {} failed/not completed in time!".format([ result.job.job_type for result in scheduler_2.processed]) assert communication_manager.optimizer is None assert communication_manager_2.optimizer is None
def post_directories_and_category_labels(self, key): """ Post the ED Directory on blockchain with the given key. The ED Directory is a JSON dictionary whose keys represent the dataset folders in the directory and whose values represent the corresponding datasets. Assume only one dataset file per folder. See _generate_ed_directory docstring for an example of what an ED Directory looks like. """ assert len(key) <= 30, \ "Keys for datasets can only be at most 30 characters long." assert self._db_client, \ "DB client has not been set. Dataset Manager needs to be configured!" assert self._ipfs_client, \ "IPFS client has not been set. Dataset Manager needs to be configured!" ed_directory = self._generate_ed_directory() self._db_client.add_classifications([key], [self.classification]) receipt = setter(client=self._ipfs_client, key=key, value=ed_directory, port=self._port) return receipt
def post_dataset(self, name): """ Post samples of datasets on blockchain with automatically generated metadata under provided name as the key IMPORTANT: NOT FINISHED DEBUGGING, DO NOT USE """ filepath = self._raw_filepath self.check_key_length(name) value = {} folders = [] for file in os.listdir(filepath): if os.path.isdir(os.path.join(os.path.abspath(filepath), file)): folders.append(file) for folder in folders: folder_dict = {} folder_path = os.path.join(os.path.abspath(filepath), folder) file = list(os.listdir(folder_path))[0] file_path = os.path.join(folder_path, file) dataset = pd.read_csv(file_path) md = pd.DataFrame(dataset.describe()) sample = dataset.sample(frac=0.1) folder_dict['ds'] = sample.to_json() folder_dict['md'] = md.to_json() value[folder] = folder_dict receipt = setter(client=self._client, key=name, value=value, port=self._port)
def post_dataset_with_md(self, name): """ Post samples of datasets on blockchain along with provided metadata under the provided name as the key IMPORTANT: NOT FINISHED DEBUGGING, DO NOT USE """ filepath = self._raw_filepath self.check_key_length(name) value = {} folders = [] for file in os.listdir(filepath): if os.path.isdir(os.path.join(os.path.abspath(filepath), file)): folders.append(file) for folder in folders: folder_dict = {} folder_path = os.path.join(os.path.abspath(filepath), folder) files = os.listdir(folder_path) for file in files: if file[:2] == 'md': file_path = os.path.join(folder_path, file) metadata = pd.read_csv(file_path) folder_dict['md'] = metadata.to_json() else: file_path = os.path.join(folder_path, file) dataset = pd.read_csv(file_path) sample = dataset.sample(frac=0.1) folder_dict['ds'] = sample.to_json() if 'md' not in folder_dict: raise NoMetadataFoundError(folder) value[folder] = folder_dict receipt = setter(client=self._client, key=name, value=value, port=self._port)
def test_blockchain_gateway_filters_sessions(blockchain_gateway, communication_manager): """ Ensures that the gateway won't intercept messages not intended for it """ serialized_job = make_serialized_job() new_session_event = { "optimizer_params": "", "serialized_job": serialized_job } tx_receipt = setter(blockchain_gateway._client, { "dataset_uuid": 5678, "label_column_name": "label" }, blockchain_gateway._port, 0, new_session_event, flag=True) assert tx_receipt blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation, blockchain_gateway._filter_new_session) # at this point we should listen for decentralized learning # not hear it (filter_new_session() == False) # and therefore not update our communication manager assert communication_manager.dummy_msg_type == "None", \ "Shouldn't have heard anything but heard a message with uuid {}".format( communication_manager.job_data["dataset_uuid"]) assert communication_manager.data_provider_info == "None", \ "Shouldn't have heard anything!" assert communication_manager.job_info == "None", \ "Shouldn't have heard anything!"
def test_blockchain_gateway_can_listen_decentralized_learning( blockchain_gateway, communication_manager): """ Uses Mock Communication Manager to ensure that the Gateway can listen for decentralized learning. """ serialized_job = make_serialized_job() new_session_event = { "optimizer_params": "this cannot be empty", "serialized_job": serialized_job } tx_receipt = setter(blockchain_gateway._client, { "dataset_uuid": 1357, "label_column_name": "label" }, blockchain_gateway._port, 0, new_session_event, True) assert tx_receipt blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation, blockchain_gateway._filter_new_session) # at this point we should listen for decentralized learning # hear it (filter_new_session() == True) # and update our communication manager assert communication_manager.dummy_msg_type == RawEventTypes.NEW_MESSAGE.name, \ "Wrong msg_type" assert communication_manager.data_provider_info == { "dataset_uuid": 1357, "label_column_name": "label" } communication_manager.reset()
def test_blockchain_gateway_can_listen_decentralized_learning( blockchain_gateway, communication_manager): """ Uses Mock Communication Manager to ensure that the Gateway can listen for decentralized learning. This test has some problems since the loop of events is incomplete. # NOTE: Should be updated after Averaging/Communication PRs are merged """ tx_receipt = setter(blockchain_gateway._client, None, blockchain_gateway._port, {"model": "hello world"}, True) assert tx_receipt blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation, blockchain_gateway._filter_new_session) # at this point we should listen for decentralized learning, hear it, and update our communication manager assert communication_manager.dummy1 == MessageEventTypes.NEW_SESSION.name, "Wrong dummy1" assert communication_manager.dummy2 == { "model": "hello world" }, "Wrong dummy2"
def _communicate(self, job): """ Communicates a message to the blockchain using the Runner's IPFS client, puts the tx_receipt in DMLResult. """ tx_receipt = setter( client=self._client, key=job.key, port=self._port, value=serialize_job(job), ) results = DMLResult( status='successful', job=job, results={ 'receipt': tx_receipt, }, error_message="", ) return results
def _communicate(self, job, state): """ Communicates a message to the blockchain using the Runner's IPFS client, puts the tx_receipt in DMLResult. """ assert job.round_num, "Nonzero round number is needed for this message!" tx_receipt = setter(client=self._client, key=content_to_ipfs(self._client, serialize_weights(job.key)), port=self._port, value=job.serialize_job(), round_num=job.round_num, state_append=state) results = DMLResult( status='successful', job=job, results={ 'receipt': tx_receipt, }, error_message="", ) return results
def test_blockchain_utils_setter_simple(config, ipfs_client): get_val_before = getter( client=ipfs_client, key=TEST_SINGLE_KEY, local_state=[], port=config.getint('BLOCKCHAIN', 'http_port'), timeout=config.getint('BLOCKCHAIN', 'timeout') ) tx_receipt = setter(client=ipfs_client, key=TEST_SINGLE_KEY, port=config.getint('BLOCKCHAIN', 'http_port'), round_num=0, value=TEST_VALUE, ) assert tx_receipt, "Setting failed" get_val_after = getter( client=ipfs_client, key=TEST_SINGLE_KEY, local_state=[], port=config.getint('BLOCKCHAIN', 'http_port'), timeout=config.getint('BLOCKCHAIN', 'timeout') ) assert get_val_after == get_val_before + [TEST_VALUE], "Setter failed!"
def test_federated_learning_two_clients_manual(new_session_event, new_session_key, new_session_key_two, config_manager_two, config_manager, ipfs_client): """ Integration test that checks that one round of federated learning can be COMPLETED with max_rounds = 2, num_averages_per_round = 2 This is everything that happens in this test: """ # Set up first client communication_manager, blockchain_gateway, scheduler = setup_client( config_manager, ipfs_client) # Set up second client communication_manager_2, blockchain_gateway_2, scheduler_2 = setup_client( config_manager_two, ipfs_client) # (0) Someone sends decentralized learning event to the chain tx_receipt = setter(client=blockchain_gateway._client, key=new_session_key, port=blockchain_gateway._port, value=new_session_event, flag=True, round_num=0) tx_receipt_two = setter(client=blockchain_gateway._client, key=new_session_key_two, port=blockchain_gateway._port, value=new_session_event, flag=True, round_num=0) assert tx_receipt assert tx_receipt_two # (1) Gateway_1 listens for the event blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation, blockchain_gateway._filter_new_session) # (2) Gateway_2 listens for the event blockchain_gateway_2._listen( blockchain_gateway_2._handle_new_session_creation, blockchain_gateway_2._filter_new_session) # (3) Scheduler_1 and Scheduler_2 runs the following jobs: # (3a.1) JOB_INIT # (3a.2) JOB_SPLIT # (3b) JOB_TRAIN # (3c) JOB_COMM scheduler.start_cron(period_in_mins=0.01) scheduler_2.start_cron(period_in_mins=0.01) timeout = time.time() + 25 while time.time() < timeout and (len(scheduler.processed) != 4\ or len(scheduler_2.processed) != 4): time.sleep(5) assert len(scheduler.processed) == 4, "Jobs failed/not completed in time!" assert len( scheduler_2.processed) == 4, "Jobs failed/not completed in time!" # (4) Gateway_1 listens for the new weights and hears only Gateway_2's weights blockchain_gateway._listen(blockchain_gateway._handle_new_session_info, blockchain_gateway._filter_new_session_info) # (6) Gateway_2 listens for the new weights and hears only Gateway_1's weights blockchain_gateway_2._listen(blockchain_gateway_2._handle_new_session_info, blockchain_gateway_2._filter_new_session_info) # (8) Scheduler_1 and Scheduler_2 runs the following jobs: # (6a) JOB_AVG # (6a) JOB_TRAIN # (6a) JOB_COMM timeout = time.time() + 20 while time.time() < timeout and (len(scheduler.processed) != 7\ or len(scheduler_2.processed) != 7): time.sleep(4) assert len(scheduler.processed) == 7, \ "Jobs {} failed/not completed in time!".format([ result.job.job_type for result in scheduler.processed]) assert len( scheduler_2.processed) == 7, "Jobs failed/not completed in time!" # (4) Gateway_1 listens for the new weights and hears only Gateway_2's weights blockchain_gateway._listen(blockchain_gateway._handle_new_session_info, blockchain_gateway._filter_new_session_info) # (6) Gateway_2 listens for the new weights and hears only Gateway_1's weights blockchain_gateway_2._listen(blockchain_gateway_2._handle_new_session_info, blockchain_gateway_2._filter_new_session_info) # (13) Optimizer tells Communication Manager to schedule JOB_AVG # (14) Scheduler_1 and Scheduler_2 runs the following jobs: # (9a) JOB_AVG timeout = time.time() + 10 while time.time() < timeout and (len(scheduler.processed) != 8\ or len(scheduler_2.processed) != 8): time.sleep(2) scheduler.stop_cron() scheduler_2.stop_cron() blockchain_gateway.reset() blockchain_gateway_2.reset() assert len(scheduler.processed) == 8, "Jobs failed/not completed in time!" assert len( scheduler_2.processed) == 8, "Jobs failed/not completed in time!" # (10) Optimizer terminates assert communication_manager.optimizer is None, "Should have terminated!" assert communication_manager_2.optimizer is None, "Should have terminated!"