Esempio n. 1
0
def test_blockchain_utils_setter_multiple_values(config, ipfs_client):
    get_val_before = getter(client=ipfs_client,
                            key=TEST_MULTIPLE_KEY,
                            local_state=[],
                            port=config.getint('BLOCKCHAIN', 'http_port'),
                            timeout=config.getint('BLOCKCHAIN', 'timeout'))
    tx_receipt = setter(
        client=ipfs_client,
        key=TEST_MULTIPLE_KEY,
        port=config.getint('BLOCKCHAIN', 'http_port'),
        value=TEST_VALUE,
    )
    assert tx_receipt, "Setting failed"
    tx_receipt = setter(
        client=ipfs_client,
        key=TEST_MULTIPLE_KEY,
        port=config.getint('BLOCKCHAIN', 'http_port'),
        value=TEST_VALUE,
    )
    assert tx_receipt, "Setting failed"
    get_val_after = getter(client=ipfs_client,
                           key=TEST_MULTIPLE_KEY,
                           local_state=[],
                           port=config.getint('BLOCKCHAIN', 'http_port'),
                           timeout=config.getint('BLOCKCHAIN', 'timeout'))
    assert get_val_after == get_val_before + [TEST_VALUE, TEST_VALUE], \
        "Multi-setter failed!"
def test_federated_learning_two_clients_automated(new_session_event,
                                                  new_session_key,
                                                  new_session_key_two,
                                                  config_manager_two,
                                                  config_manager, ipfs_client):
    """
    Tests fully automated federated learning.
    """
    # Set up first client
    communication_manager, blockchain_gateway, scheduler = setup_client(
        config_manager, ipfs_client)
    # Set up second client
    communication_manager_2, blockchain_gateway_2, scheduler_2 = setup_client(
        config_manager_two, ipfs_client)
    # (0) Someone sends decentralized learning event to the chain
    tx_receipt = setter(client=blockchain_gateway._client,
                        key=new_session_key,
                        port=blockchain_gateway._port,
                        value=new_session_event,
                        flag=True,
                        round_num=0)
    tx_receipt_two = setter(client=blockchain_gateway._client,
                            key=new_session_key_two,
                            port=blockchain_gateway._port,
                            value=new_session_event,
                            flag=True,
                            round_num=0)
    assert tx_receipt
    assert tx_receipt_two
    scheduler.start_cron(period_in_mins=0.01)
    scheduler_2.start_cron(period_in_mins=0.01)
    blockchain_gateway.start_cron(period_in_mins=0.01)
    blockchain_gateway_2.start_cron(period_in_mins=0.01)
    timeout = 50 + time.time()
    while time.time() < timeout and (len(scheduler.processed) != 8
                                     or len(scheduler_2.processed) != 8):
        time.sleep(1)
    scheduler.stop_cron()
    scheduler_2.stop_cron()
    blockchain_gateway.stop_cron()
    blockchain_gateway_2.stop_cron()
    assert len(scheduler.processed) == 8, \
        "Jobs {} failed/not completed in time!".format([
        result.job.job_type for result in scheduler.processed])
    assert len(scheduler_2.processed) == 8, \
        "Jobs {} failed/not completed in time!".format([
        result.job.job_type for result in scheduler_2.processed])
    assert communication_manager.optimizer is None
    assert communication_manager_2.optimizer is None
    def post_directories_and_category_labels(self, key):
        """
        Post the ED Directory on blockchain with the given key.

        The ED Directory is a JSON dictionary whose keys represent the dataset
        folders in the directory and whose values represent the corresponding
        datasets. Assume only one dataset file per folder. 

        See _generate_ed_directory docstring for an example of what an ED
        Directory looks like.
        """
        assert len(key) <= 30, \
            "Keys for datasets can only be at most 30 characters long."
        assert self._db_client, \
            "DB client has not been set. Dataset Manager needs to be configured!"
        assert self._ipfs_client, \
            "IPFS client has not been set. Dataset Manager needs to be configured!"

        ed_directory = self._generate_ed_directory()

        self._db_client.add_classifications([key], [self.classification])

        receipt = setter(client=self._ipfs_client,
                         key=key,
                         value=ed_directory,
                         port=self._port)
        return receipt
    def post_dataset(self, name):
        """
        Post samples of datasets on blockchain with automatically generated
        metadata under provided name as the key

        IMPORTANT: NOT FINISHED DEBUGGING, DO NOT USE
        """
        filepath = self._raw_filepath
        self.check_key_length(name)
        value = {}
        folders = []
        for file in os.listdir(filepath):
            if os.path.isdir(os.path.join(os.path.abspath(filepath), file)):
                folders.append(file)
        for folder in folders:
            folder_dict = {}
            folder_path = os.path.join(os.path.abspath(filepath), folder)
            file = list(os.listdir(folder_path))[0]
            file_path = os.path.join(folder_path, file)
            dataset = pd.read_csv(file_path)
            md = pd.DataFrame(dataset.describe())
            sample = dataset.sample(frac=0.1)
            folder_dict['ds'] = sample.to_json()
            folder_dict['md'] = md.to_json()
            value[folder] = folder_dict
        receipt = setter(client=self._client, key=name, value=value, port=self._port)
    def post_dataset_with_md(self, name):
        """
        Post samples of datasets on blockchain along with provided metadata
        under the provided name as the key

        IMPORTANT: NOT FINISHED DEBUGGING, DO NOT USE
        """
        filepath = self._raw_filepath
        self.check_key_length(name)
        value = {}
        folders = []
        for file in os.listdir(filepath):
            if os.path.isdir(os.path.join(os.path.abspath(filepath), file)):
                folders.append(file)
        for folder in folders:
            folder_dict = {}
            folder_path = os.path.join(os.path.abspath(filepath), folder)
            files = os.listdir(folder_path)
            for file in files:
                if file[:2] == 'md':
                    file_path = os.path.join(folder_path, file)
                    metadata = pd.read_csv(file_path)
                    folder_dict['md'] = metadata.to_json()
                else:
                    file_path = os.path.join(folder_path, file)
                    dataset = pd.read_csv(file_path)
                    sample = dataset.sample(frac=0.1)
                    folder_dict['ds'] = sample.to_json()
            if 'md' not in folder_dict:
                raise NoMetadataFoundError(folder)
            value[folder] = folder_dict
        receipt = setter(client=self._client, key=name, value=value, port=self._port)
def test_blockchain_gateway_filters_sessions(blockchain_gateway,
                                             communication_manager):
    """
    Ensures that the gateway won't intercept messages not intended for it
    """
    serialized_job = make_serialized_job()
    new_session_event = {
        "optimizer_params": "",
        "serialized_job": serialized_job
    }
    tx_receipt = setter(blockchain_gateway._client, {
        "dataset_uuid": 5678,
        "label_column_name": "label"
    },
                        blockchain_gateway._port,
                        0,
                        new_session_event,
                        flag=True)
    assert tx_receipt
    blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation,
                               blockchain_gateway._filter_new_session)
    # at this point we should listen for decentralized learning
    # not hear it (filter_new_session() == False)
    # and therefore not update our communication manager
    assert communication_manager.dummy_msg_type == "None", \
        "Shouldn't have heard anything but heard a message with uuid {}".format(
            communication_manager.job_data["dataset_uuid"])
    assert communication_manager.data_provider_info == "None", \
        "Shouldn't have heard anything!"
    assert communication_manager.job_info == "None", \
        "Shouldn't have heard anything!"
def test_blockchain_gateway_can_listen_decentralized_learning(
        blockchain_gateway, communication_manager):
    """
    Uses Mock Communication Manager to ensure that the Gateway
    can listen for decentralized learning.
    """
    serialized_job = make_serialized_job()
    new_session_event = {
        "optimizer_params": "this cannot be empty",
        "serialized_job": serialized_job
    }
    tx_receipt = setter(blockchain_gateway._client, {
        "dataset_uuid": 1357,
        "label_column_name": "label"
    }, blockchain_gateway._port, 0, new_session_event, True)
    assert tx_receipt
    blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation,
                               blockchain_gateway._filter_new_session)
    # at this point we should listen for decentralized learning
    # hear it (filter_new_session() == True)
    # and update our communication manager
    assert communication_manager.dummy_msg_type == RawEventTypes.NEW_MESSAGE.name, \
        "Wrong msg_type"
    assert communication_manager.data_provider_info == {
        "dataset_uuid": 1357,
        "label_column_name": "label"
    }
    communication_manager.reset()
Esempio n. 8
0
def test_blockchain_gateway_can_listen_decentralized_learning(
        blockchain_gateway, communication_manager):
    """
    Uses Mock Communication Manager to ensure that the Gateway
    can listen for decentralized learning.
    This test has some problems since the loop of events is incomplete.
    # NOTE: Should be updated after Averaging/Communication PRs are merged
    """

    tx_receipt = setter(blockchain_gateway._client, None,
                        blockchain_gateway._port, {"model": "hello world"},
                        True)
    assert tx_receipt
    blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation,
                               blockchain_gateway._filter_new_session)
    # at this point we should listen for decentralized learning, hear it, and update our communication manager
    assert communication_manager.dummy1 == MessageEventTypes.NEW_SESSION.name, "Wrong dummy1"
    assert communication_manager.dummy2 == {
        "model": "hello world"
    }, "Wrong dummy2"
Esempio n. 9
0
 def _communicate(self, job):
     """
     Communicates a message to the blockchain using the Runner's
     IPFS client, puts the tx_receipt in DMLResult.
     """
     tx_receipt = setter(
         client=self._client,
         key=job.key,
         port=self._port,
         value=serialize_job(job),
     )
     results = DMLResult(
         status='successful',
         job=job,
         results={
             'receipt': tx_receipt,
         },
         error_message="",
     )
     return results
Esempio n. 10
0
 def _communicate(self, job, state):
     """
     Communicates a message to the blockchain using the Runner's
     IPFS client, puts the tx_receipt in DMLResult.
     """
     assert job.round_num, "Nonzero round number is needed for this message!"
     tx_receipt = setter(client=self._client,
                         key=content_to_ipfs(self._client,
                                             serialize_weights(job.key)),
                         port=self._port,
                         value=job.serialize_job(),
                         round_num=job.round_num,
                         state_append=state)
     results = DMLResult(
         status='successful',
         job=job,
         results={
             'receipt': tx_receipt,
         },
         error_message="",
     )
     return results
def test_blockchain_utils_setter_simple(config, ipfs_client):
    get_val_before = getter(
        client=ipfs_client,
        key=TEST_SINGLE_KEY,
        local_state=[],
        port=config.getint('BLOCKCHAIN', 'http_port'),
        timeout=config.getint('BLOCKCHAIN', 'timeout')
    )
    tx_receipt = setter(client=ipfs_client,
        key=TEST_SINGLE_KEY,
        port=config.getint('BLOCKCHAIN', 'http_port'),
        round_num=0,
        value=TEST_VALUE,
    )
    assert tx_receipt, "Setting failed"
    get_val_after = getter(
        client=ipfs_client,
        key=TEST_SINGLE_KEY,
        local_state=[],
        port=config.getint('BLOCKCHAIN', 'http_port'),
        timeout=config.getint('BLOCKCHAIN', 'timeout')
    )
    assert get_val_after == get_val_before + [TEST_VALUE], "Setter failed!"
def test_federated_learning_two_clients_manual(new_session_event,
                                               new_session_key,
                                               new_session_key_two,
                                               config_manager_two,
                                               config_manager, ipfs_client):
    """
    Integration test that checks that one round of federated learning can be
    COMPLETED with max_rounds = 2, num_averages_per_round = 2

    This is everything that happens in this test:

    """
    # Set up first client
    communication_manager, blockchain_gateway, scheduler = setup_client(
        config_manager, ipfs_client)
    # Set up second client
    communication_manager_2, blockchain_gateway_2, scheduler_2 = setup_client(
        config_manager_two, ipfs_client)
    # (0) Someone sends decentralized learning event to the chain
    tx_receipt = setter(client=blockchain_gateway._client,
                        key=new_session_key,
                        port=blockchain_gateway._port,
                        value=new_session_event,
                        flag=True,
                        round_num=0)
    tx_receipt_two = setter(client=blockchain_gateway._client,
                            key=new_session_key_two,
                            port=blockchain_gateway._port,
                            value=new_session_event,
                            flag=True,
                            round_num=0)
    assert tx_receipt
    assert tx_receipt_two
    # (1) Gateway_1 listens for the event
    blockchain_gateway._listen(blockchain_gateway._handle_new_session_creation,
                               blockchain_gateway._filter_new_session)
    # (2) Gateway_2 listens for the event
    blockchain_gateway_2._listen(
        blockchain_gateway_2._handle_new_session_creation,
        blockchain_gateway_2._filter_new_session)
    # (3) Scheduler_1 and Scheduler_2 runs the following jobs:
    # (3a.1) JOB_INIT
    # (3a.2) JOB_SPLIT
    # (3b) JOB_TRAIN
    # (3c) JOB_COMM
    scheduler.start_cron(period_in_mins=0.01)
    scheduler_2.start_cron(period_in_mins=0.01)
    timeout = time.time() + 25
    while time.time() < timeout and (len(scheduler.processed) != 4\
        or len(scheduler_2.processed) != 4):
        time.sleep(5)
    assert len(scheduler.processed) == 4, "Jobs failed/not completed in time!"
    assert len(
        scheduler_2.processed) == 4, "Jobs failed/not completed in time!"
    # (4) Gateway_1 listens for the new weights and hears only Gateway_2's weights
    blockchain_gateway._listen(blockchain_gateway._handle_new_session_info,
                               blockchain_gateway._filter_new_session_info)
    # (6) Gateway_2 listens for the new weights and hears only Gateway_1's weights
    blockchain_gateway_2._listen(blockchain_gateway_2._handle_new_session_info,
                                 blockchain_gateway_2._filter_new_session_info)
    # (8) Scheduler_1 and Scheduler_2 runs the following jobs:
    # (6a) JOB_AVG
    # (6a) JOB_TRAIN
    # (6a) JOB_COMM
    timeout = time.time() + 20
    while time.time() < timeout and (len(scheduler.processed) != 7\
        or len(scheduler_2.processed) != 7):
        time.sleep(4)
    assert len(scheduler.processed) == 7, \
        "Jobs {} failed/not completed in time!".format([
            result.job.job_type for result in scheduler.processed])
    assert len(
        scheduler_2.processed) == 7, "Jobs failed/not completed in time!"
    # (4) Gateway_1 listens for the new weights and hears only Gateway_2's weights
    blockchain_gateway._listen(blockchain_gateway._handle_new_session_info,
                               blockchain_gateway._filter_new_session_info)
    # (6) Gateway_2 listens for the new weights and hears only Gateway_1's weights
    blockchain_gateway_2._listen(blockchain_gateway_2._handle_new_session_info,
                                 blockchain_gateway_2._filter_new_session_info)
    # (13) Optimizer tells Communication Manager to schedule JOB_AVG
    # (14) Scheduler_1 and Scheduler_2 runs the following jobs:
    # (9a) JOB_AVG
    timeout = time.time() + 10
    while time.time() < timeout and (len(scheduler.processed) != 8\
        or len(scheduler_2.processed) != 8):
        time.sleep(2)
    scheduler.stop_cron()
    scheduler_2.stop_cron()
    blockchain_gateway.reset()
    blockchain_gateway_2.reset()
    assert len(scheduler.processed) == 8, "Jobs failed/not completed in time!"
    assert len(
        scheduler_2.processed) == 8, "Jobs failed/not completed in time!"
    # (10) Optimizer terminates
    assert communication_manager.optimizer is None, "Should have terminated!"
    assert communication_manager_2.optimizer is None, "Should have terminated!"