예제 #1
0
 def _average(self, job):
     """
     Average the weights in the job weighted by their omegas.
     """
     assert list(job.new_weights), "No new_weights supplied to average!"
     job.new_weights = deserialize_weights(job.new_weights)
     assert isinstance(
         job.new_weights[0],
         np.ndarray), "should have been ndarray but was {}".format(
             type(job.new_weights[0]))
     assert job.omega, "No omega supplied to average!"
     assert isinstance(job.omega, float), job.omega
     assert job.sigma_omega, "No sigma_omega supplied to average!"
     assert isinstance(job.sigma_omega, float), job.sigma_omega
     assert isinstance(
         job.weights[0],
         np.ndarray), "should have been ndarray but was {}".format(
             type(job.weights[0]))
     averaged_weights = self._weighted_running_avg(job.weights,
                                                   job.new_weights,
                                                   job.sigma_omega,
                                                   job.omega)
     result = DMLResult(
         status='successful',
         job=job,
         results={
             'weights': averaged_weights,
         },
         error_message="",
     )
     return result
예제 #2
0
    def run_job(self, job, state=None):
        """
        Identifies a DMLJob type and executes it.

        If the runner is already executing a job, it silently does nothing.
        """
        assert job.job_type.upper() in self.JOB_CALLBACKS, \
            'DMLJob type ({0}) is not valid'.format(job.job_type)
        logging.info("Running job (type: {0})...".format(job.job_type))
        callback = callback_handler_no_default(
            job.job_type,
            self.JOB_CALLBACKS,
        )
        try:
            if job.job_type in self.JOBS_NEEDING_STATE:
                results = callback(job, state)
            else:
                results = callback(job)
        except Exception as e:
            logging.error("RunnerError: " + str(e))
            results = DMLResult(
                status='failed',
                job=job,
                error_message=str(e),
                results={},
            )
        self.current_job = None
        logging.info("Finished running job! (type: {0})".format(job.job_type))
        return results
예제 #3
0
 def _initialize(self, job):
     """
     Initializes and returns a DMLResult with the model
     weights as specified in the model.
     """
     assert job.framework_type in ['keras'], \
         "Model type '{0}' is not supported.".format(job.framework_type)
     self.logger.info("Initializing model...")
     if job.framework_type == 'keras':
         if job.h5_model_folder:
             h5_model_folder = job.h5_model_folder
             h5_model_filepath = os.path.join(h5_model_folder, 'model.h5')
             model = load_model(h5_model_filepath)
             gradients = job.gradients
             learning_rate = model.optimizer.lr
             new_weights = np.subtract(model.get_weights(), gradients)
             model.set_weights(new_weights)
             model.save(h5_model_filepath)
         else:
             h5_model_folder = os.path.join('sessions', job.session_id)
             h5_model_filepath = os.path.join(h5_model_folder, 'model.h5')
             model = load_model(h5_model_filepath)
             print("Loaded model!")
     results = DMLResult(
         status='successful',
         job=job,
         results={
             'model': model,
             'h5_model_folder': h5_model_folder,
         },
         error_message="",
     )
     return results
예제 #4
0
    def _split_data(self, job):
        """
        Takes in a job, which should have the raw filepath assigned.

        1. In each folder, aggregate all data.
        2. Create session folder in transformed folder, along with data folders
           in session folder.
        3. Shuffle each transformed data and perform train-test split on each. 
        4. Put each training and test set in corresponding data folders in 
           session folder.
        5. Update session filepath in job.
        """

        # 1. Extracts all of the raw data from raw data filepath
        assert job.raw_filepath, \
            "Raw data filepath has not been set!"
        files = os.listdir(job.raw_filepath)
        files = list(filter(lambda x: x.endswith('.csv'), files))
        assert len(files) == 1, \
            "Only supporting one file per dataset folder!"
        data_filepath = os.path.join(job.raw_filepath, files[0])
        raw_data = pd.read_csv(data_filepath)

        # 2. Create transformed folder, if it doesn't exist.
        transformed_filepath = os.path.join(job.raw_filepath, "transformed")
        if not os.path.isdir(transformed_filepath):
            os.makedirs(transformed_filepath)

        # 3. Create session folder using timestamp and random characters.
        new_name = str(uuid.uuid4())
        session_filepath = os.path.join(transformed_filepath, new_name)
        os.makedirs(session_filepath)

        # 4. Retrieve train-test split from job.
        split = job.hyperparams['split']

        # 5. Shuffle raw data, then split into train and test set.
        raw_data = raw_data.sample(frac=1)
        split_index = int(len(raw_data) * split)
        train = raw_data.iloc[:split_index]
        test = raw_data.iloc[split_index:]

        # 6. Create train.csv and test.csv in data folder.
        train.to_csv(os.path.join(session_filepath, 'train.csv'), index=False)

        test.to_csv(os.path.join(session_filepath, 'test.csv'), index=False)

        # 7. Get datapoint count to be used in future jobs
        datapoint_count = count_datapoints(session_filepath)

        # 8. Return job with assigned session folder filepath and
        #    datapoint count.
        results = DMLResult(status='successful',
                            job=job,
                            results={
                                'session_filepath': session_filepath,
                                'datapoint_count': datapoint_count
                            },
                            error_message="")
        return results
예제 #5
0
    def _communicate(self, job):
        """
        Communicates a message to the blockchain using the Runner's
        IPFS client, puts the tx_receipt in DMLResult.
        """
        assert job.round_num, "Nonzero round number is needed for this message!"

        # tx_receipt = setter(
        #     client=self._client,
        #     key = content_to_ipfs(self._client, serialize_weights(job.key)),
        #     port = self._port,
        #     value = job.serialize_job(),
        #     round_num = job.round_num,
        #     state_append=state
        # )

        train_results = {"weights": job.weights, "omega": job.omega}
        job.websocket_client.send_new_weights(train_results, job.session_id)

        results = DMLResult(
            status='successful',
            job=job,
            error_message="",
        )
        return results
예제 #6
0
    def _validate(self, job, custom_iterator=None):
        """
        Validates on all the local data the specified machine learning model at
        the state specified.

        Returns a DMLResult with the metrics returned by the model.

        NOTE: Assumes 'weights' are the actual weights and not a path.
        """
        logging.info("Validating model...")

        train_dataset_path, test_dataset_path = self._set_up(job)
        data_count = job.datapoint_count
        # Choose the dataset to validate on.
        batch_size = job.hyperparams['batch_size']
        split = job.hyperparams['split']
        if custom_iterator is None:
            dataset_iterator = create_random_test_dataset_iterator(
                test_dataset_path,
                batch_size=batch_size,
                labeler=job.label_column_name,
            )
        else:
            dataset_iterator = custom_iterator

        # Validate the model the right way based on the model type.
        assert job.framework_type in ['keras'], \
            "Model type '{0}' is not supported.".format(job.framework_type)
        if job.framework_type == 'keras':
            val_stats = validate_keras_model(job.serialized_model, job.weights,
                                             dataset_iterator,
                                             data_count * (1 - split))

        # Return the validation stats.
        results = DMLResult(
            status='successful',
            job=job,
            results={
                'val_stats': val_stats,
            },
            error_message="",
        )
        return results
예제 #7
0
 def _average(self, job):
     """
     Average the weights in the job weighted by their omegas.
     """
     assert list(job.new_weights), "No new_weights supplied to average!"
     assert job.omega, "No omega supplied to average!"
     assert job.sigma_omega, "No sigma_omega supplied to average!"
     averaged_weights = self._weighted_running_avg(job.weights,
                                                   job.new_weights,
                                                   job.sigma_omega,
                                                   job.omega)
     result = DMLResult(
         status='successful',
         job=job,
         results={
             'weights': averaged_weights,
         },
         error_message="",
     )
     return result
예제 #8
0
 def _communicate(self, job):
     """
     Communicates a message to the blockchain using the Runner's
     IPFS client, puts the tx_receipt in DMLResult.
     """
     tx_receipt = setter(
         client=self._client,
         key=job.key,
         port=self._port,
         value=serialize_job(job),
     )
     results = DMLResult(
         status='successful',
         job=job,
         results={
             'receipt': tx_receipt,
         },
         error_message="",
     )
     return results
예제 #9
0
 def _post_statistics(self, job):
     """
     Posts statistics about a training job to a cloud server
     """
     assert job.statistics, "No statistics to be served"
     server_url = "{0}/{1}/{2}".format(self._server_url, job.job_uuid,
                                       job.dataset_uuid)
     response = requests.post(url=server_url,
                              headers={'content-type': 'application/json'},
                              json={
                                  'round_num': job.round_num,
                                  'dataset_stats': job.statistics
                              })
     response_dict = json.loads(response.text)
     results = DMLResult(
         status='successful',
         job=job,
         results={'receipt': response_dict['status']},
         error_message="",
     )
     return results
예제 #10
0
 def _initialize(self, job):
     """
     Initializes and returns a DMLResult with the model
     weights as specified in the model.
     """
     assert job.framework_type in ['keras'], \
         "Model type '{0}' is not supported.".format(job.framework_type)
     logging.info("Initializing model...")
     if job.framework_type == 'keras':
         model = model_from_serialized(job.serialized_model)
         #model.summary()
         initial_weights = model.get_weights()
     results = DMLResult(
         status='successful',
         job=job,
         results={
             'weights': initial_weights,
         },
         error_message="",
     )
     return results
예제 #11
0
 def _communicate(self, job, state):
     """
     Communicates a message to the blockchain using the Runner's
     IPFS client, puts the tx_receipt in DMLResult.
     """
     assert job.round_num, "Nonzero round number is needed for this message!"
     tx_receipt = setter(client=self._client,
                         key=content_to_ipfs(self._client,
                                             serialize_weights(job.key)),
                         port=self._port,
                         value=job.serialize_job(),
                         round_num=job.round_num,
                         state_append=state)
     results = DMLResult(
         status='successful',
         job=job,
         results={
             'receipt': tx_receipt,
         },
         error_message="",
     )
     return results
예제 #12
0
    def _train(self, job):
        """
        Trains the specified machine learning model on all the local data,
        starting from the initial model state specified, until a stopping
        condition is met, and using the hyper-parameters specified.

        Returns a DMLResult with the updated model weights, the weighting factor
        omega, and stats about the training job.

        NOTE: Uses the same hyperparameters and labeler for training and
        validating during 'avg_type' of type 'val_acc'.

        NOTE2: Assumes 'job.weights' are the actual weights and not a path.
        """

        train_dataset_path, test_dataset_path = self._set_up(job)
        data_count = job.datapoint_count

        # Get the right dataset iterator based on the averaging type.
        avg_type = job.hyperparams['averaging_type']
        batch_size = job.hyperparams['batch_size']
        split = job.hyperparams['split']
        assert avg_type in ['data_size', 'val_acc'], \
            "Averaging type '{0}' is not supported.".format(avg_type)
        logging.info("Training model...")
        if avg_type == 'data_size':
            dataset_iterator = create_random_train_dataset_iterator(
                train_dataset_path,
                batch_size=batch_size,
                labeler=job.label_column_name,
            )
        elif avg_type == 'val_acc':
            dataset_iterator = create_random_train_dataset_iterator(
                train_dataset_path,
                batch_size=batch_size,
                labeler=job.label_column_name,
            )
            test_dataset_iterator = create_random_test_dataset_iterator(
                test_dataset_path,
                batch_size=batch_size,
                labeler=job.label_column_name,
            )

        # Train the model the right way based on the model type.
        assert job.framework_type in ['keras'], \
            "Model type '{0}' is not supported.".format(job.framework_type)

        if job.framework_type == 'keras':
            new_weights_path, train_stats = train_keras_model(
                job.serialized_model, job.weights, dataset_iterator,
                data_count * split, job.hyperparams, self.config)

        # Get the right omega based on the averaging type.
        if avg_type == 'data_size':
            omega = float(data_count * split)
        elif avg_type == 'val_acc':
            val_stats = self._validate(
                job,
                custom_iterator=test_dataset_iterator).results['val_stats']
            omega = val_stats['val_metric']['acc']
            train_stats.update(val_stats)

        # Return the results.
        # return new_weights_path, omega, train_stats
        results = DMLResult(
            status='successful',
            job=job,
            results={
                'weights': new_weights_path,
                'omega': omega,
                'train_stats': train_stats,
            },
            error_message="",
        )
        return results
예제 #13
0
    def _train(self, job):
        """
        Trains the specified machine learning model on all the local data,
        starting from the initial model state specified, until a stopping
        condition is met, and using the hyper-parameters specified.

        Returns a DMLResult with the updated model weights, the weighting factor
        omega, and stats about the training job.

        NOTE: Uses the same hyperparameters and labeler for training and
        validating during 'avg_type' of type 'val_acc'.

        NOTE2: Assumes 'job.weights' are the actual weights and not a path.
        """

        train_dataset_path, test_dataset_path = self._set_up()
        data_count_mappings = count_datapoints(self._dataset_path)

        # Get the right dataset iterator based on the averaging type.
        avg_type = job.hyperparams.get('averaging_type', 'data_size')
        batch_size = job.hyperparams['batch_size']
        assert avg_type in ['data_size', 'val_acc'], \
            "Averaging type '{0}' is not supported.".format(avg_type)
        self.logger.info("Training model...")
        if avg_type == 'data_size':
            dataset_iterator = create_random_train_dataset_iterator(
                train_dataset_path,
                batch_size=batch_size,
                infinite=False,
                num_epochs=job.hyperparams.get('epochs'))
        elif avg_type == 'val_acc':
            dataset_iterator = create_random_train_dataset_iterator(
                train_dataset_path,
                batch_size=batch_size,
            )
            test_dataset_iterator = create_random_test_dataset_iterator(
                test_dataset_path,
                batch_size=batch_size,
            )

        # Train the model the right way based on the model type.
        assert job.framework_type in ['keras'], \
            "Model type '{0}' is not supported.".format(job.framework_type)

        if job.framework_type == 'keras':
            trained_model, result_val = train_keras_model(
                job.model,
                dataset_iterator,
                data_count_mappings['train.csv'],
                job.hyperparams,
                self.config,
            )

        # Get the right omega based on the averaging type.
        if avg_type == 'data_size':
            omega = data_count_mappings['train.csv']
        elif avg_type == 'val_acc':
            val_stats = self._validate(
                job,
                custom_iterator=test_dataset_iterator).results['val_stats']
            omega = val_stats['val_metric']['acc']
            train_stats.update(val_stats)
        trained_model.save("sessions/my_model.h5")
        with open("sessions/my_model.h5", mode='rb') as file:
            file_content = file.read()
            encoded_content = base64.b64encode(file_content)
            h5_model = encoded_content.decode('ascii')

        train_results = {
            'omega': omega,
        }

        train_results['gradients'] = result_val

        # new_weights_path = [weights.tolist() for weights in new_weights_path]
        #print(new_weights_path)
        # Return the results.
        # return new_weights_path, omega, train_stats
        results = DMLResult(
            status='successful',
            job=job,
            results=train_results,
            error_message="",
        )
        return results