Exemple #1
0
def forward_datasets(stub, client, secret, last_fetch):
    if not last_fetch or (datetime.datetime.now() - last_fetch).seconds > 60 * 60 * 2:
        last_fetch = datetime.datetime.now()
        response=requests.get(f"https://google.ch")
        try:
            response = requests.get(f"{os.getenv('DATA_WRAPPER_URL', 'http://data_wrapper/')}get_available_datasets")
            datasets = response.json()
        except Exception as error:
            server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub,
                                                                             grpc_function='send_datasets',
                                                                             request=globalserver_pb2.DefaultRequest(
                                                                                 client=client,
                                                                                 secret=secret,
                                                                                 protocol=str(error)))

            server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub,
                                                                             grpc_function='send_datasets',
                                                                             request=globalserver_pb2.DefaultRequest(
                                                                                 client=client,
                                                                                 secret=secret,
                                                                                 protocol=str(response.content)))
            server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub,
                                                                             grpc_function='send_datasets',
                                                                             request=globalserver_pb2.DefaultRequest(
                                                                                 client=client,
                                                                                 secret=secret,
                                                                                 protocol=str(response.raw.data)))
            datasets = []
        for i_dataset, dataset in enumerate(datasets):
            for i_feature, feature in enumerate(dataset['features']):

                if feature['type'] == 'categorical':
                    continue
                feature_reduced = {key: value for key, value in feature.items() if
                                   key in ['feature', 'type', 'categories']}

                feature_reduced['warning'] = []
                # if feature['max_value'] > ((feature['q3'] - feature['q1']) * 10 + feature['mean']):
                #     feature_reduced['warning'].append("This feature has large +outliers (iqr=3)")
                # if feature['min_value'] < (feature['mean'] - (feature['q3'] - feature['q1']) * 10):
                #     feature_reduced['warning'].append("This feature has large -outliers (iqr=3)")
                if feature['iqr_outliers'] > 0:
                    feature_reduced['warning'].append("This feature has outliers (iqr=1.5)")
                if feature['3std-percentage'] > 0.03:
                    feature_reduced['warning'].append("This feature many outliers (3std>0.97)")
                datasets[i_dataset]['features'][i_feature] = feature_reduced

        server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub,
                                                                         grpc_function='send_datasets',
                                                                         request=globalserver_pb2.DefaultRequest(
                                                                             client=client,
                                                                             secret=secret,
                                                                             protocol=json.dumps(datasets)))

    return stub, last_fetch
Exemple #2
0
    def P2P_train_model(self, experiment_id, task_id):

        logging.info("Training...%s", self.experiment_id)
        generator = self.data_generator(
            "train",
            preprocessing=self.preprocessing_function,
            config=self.config,
            client=self.client)

        train_X, train_y = next(generator)

        train_data_local = xgb.DMatrix(train_X, label=train_y)
        train_params_dict = self.config['compile']['model_params'].copy()

        train_params_dict['nthread'] = self.config['training'].get(
            'nthread', -1)
        train_params_dict['verbosity'] = self.config['training'].get(
            'verbosity', 0)

        self.model = xgb.train(
            train_params_dict,
            train_data_local,
            num_boost_round=self.config['training']['client_steps_per_round'],
            xgb_model=self.model)

        _, self.stub, _ = grpc_util.get_grpc_connection(
            grpc_function='train_model_response',
            request=globalserver_pb2.DefaultRequest(
                client=self.client,
                secret=self.secret,
                task_id=task_id,
                experiment_id=experiment_id))
        logging.info("Training finished. %s", self.experiment_id)
        gc.collect()
        return True
Exemple #3
0
    def RF_train_model(self, experiment_id, task_id):
        """Computes local histogram data for given information. Assumes RF_fetch_model is previously called
        and that the following fields have been set by the server process in the model-configuration-file:
        - current_condition_list
        - current_feature_list
        - random_state
        This function then writes the result into the local model under the attribute model_update

        NOTE: Function assumes positive-label=1, negative-label=0, need to incorporate how we can pass this information to the worker.
        """
        logging.info("Training...%s", self.experiment_id)

        batch = self.batch
        histograms = utils.RF_create_histograms(batch, self.config, self.model)

        self.model.model_update = histograms  # store as string

        _, self.stub, _ = grpc_util.get_grpc_connection(
            grpc_function='train_model_response',
            request=globalserver_pb2.DefaultRequest(
                client=self.client,
                secret=self.secret,
                task_id=task_id,
                experiment_id=experiment_id))
        logging.info("Training finished. %s", self.experiment_id)
        gc.collect()
        return True
Exemple #4
0
    def fetch_model(self, experiment_id, task_id, protocol):
        logging.info(f"Parsing Model...%s", experiment_id)

        _, self.stub, responses = grpc_util.get_grpc_connection(
            grpc_function='fetch_model_request',
            request=globalserver_pb2.DefaultRequest(
                client=self.client,
                task_id=task_id,
                secret=self.secret,
                experiment_id=experiment_id))
        for row in responses:  # pseudo stream
            load_model = getattr(
                self, protocol + "_load_model"
            )  #todo split into load config and compile model. In load config set custom config!
            load_model(model=row)

        logging.info(f"Model parsed...%s", experiment_id)

        self._set_dataset()
        self._set_custom_training_config()  #todo allow to change everything

        self._set_preprocessing()
        tf.keras.backend.clear_session()
        gc.collect()
        return True
Exemple #5
0
def stop_workers(worker_instances, stub, client, secret):
    if worker_instances:
        _, stub, stop_experiment_response = get_grpc_connection(stub=stub,
                                                                grpc_function='stop_experiment',
                                                                request=globalserver_pb2.DefaultRequest(
                                                                    client=client,
                                                                    secret=secret))

        stop_experiments = json.loads(stop_experiment_response.experiment_id)
        for experiment_id in stop_experiments:
            worker_instances, stub = cancel_worker(worker_instances=worker_instances, experiment_id=experiment_id,
                                                   stub=stub, client=client, secret=secret,
                                                   grpc_function='stopped_experiment_response')
    return worker_instances, stub
Exemple #6
0
def cancel_worker(worker_instances, experiment_id, stub, client, secret, grpc_function, error_msg=''):
    if experiment_id in worker_instances:
        logging.info(f"Try to cancel {experiment_id} Worker. {grpc_function}")
        worker_instances[experiment_id].cancel()
        time.sleep(1)
        if not worker_instances[experiment_id].done():
            return worker_instances, stub

    server_ok, stub, response = get_grpc_connection(stub=stub,
                                                    grpc_function=grpc_function,
                                                    request=globalserver_pb2.DefaultRequest(
                                                        experiment_id=experiment_id,
                                                        protocol=error_msg,
                                                        client=client,
                                                        secret=secret))
    return worker_instances, stub
Exemple #7
0
    def __init__(self, client, secret, experiment_id):

        logging.info(f"Initialize worker...")
        self.experiment_id = experiment_id
        self.client = client
        self.secret = secret
        self.model = None
        self.global_weights = None
        self.config = {}
        self.data_generator = None
        self.preprocessing_function = None
        # This is only needed until memory leak is fixed, to read memory usage
        self.process = psutil.Process(os.getpid())
        self.batch = None  # todo ugly
        _, self.stub, _ = grpc_util.get_grpc_connection(
            grpc_function='test_connection',
            request=globalserver_pb2.DefaultRequest(
                client=client, secret=secret, experiment_id=experiment_id))
        self.dataset_metadata = {}
        logging.info(f"GRPC Connection established...")
Exemple #8
0
def start_workers(worker, worker_instances, error_queue, stub, client, secret):
    server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub,
                                                                     grpc_function='start_experiment',
                                                                     request=globalserver_pb2.DefaultRequest(
                                                                         client=client,
                                                                         secret=secret))
    start_experiments = json.loads(start_experiment_response.experiment_id)
    for experiment_id in start_experiments:
        if experiment_id not in worker_instances:
            logging.info(f"starting {experiment_id} Worker")
            worker_instances[experiment_id] = worker(client=client, error_queue=error_queue, secret=secret,
                                                     experiment_id=experiment_id)

    for experiment_id in list(worker_instances):  # kill running instances that have no runnin experiment
        if experiment_id not in start_experiments:
            if worker_instances[experiment_id].done():
                worker_instances.pop(experiment_id)
            else:

                worker_instances, stub = cancel_worker(worker_instances=worker_instances, experiment_id=experiment_id,
                                                       stub=stub, client=client, secret=secret,
                                                       grpc_function='stopped_experiment_response')
    return worker_instances, stub
Exemple #9
0
    def NN_train_model(self, experiment_id, task_id):
        logging.info("Training...%s", self.experiment_id)

        self.model.fit(
            self.data_generator("train",
                                preprocessing=self.preprocessing_function,
                                config=self.config,
                                client=self.client),
            epochs=self.config['training'].get("epochs", 1),
            verbose=self.config['training'].get("verbose", 0),
            callbacks=self.config['training'].get("callback", []),
            shuffle=self.config['training'].get("shuffle", True),
            class_weight={
                int(key): value
                for key, value in self.config['training'].get(
                    "class_weight").items()
            } if self.config['training'].get("class_weight", None) else None,
            initial_epoch=self.config['training'].get("initial_epoch", 0),
            steps_per_epoch=self.config['training'].get("steps_per_epoch", 12),
            max_queue_size=self.config['training'].get("max_queue_size", 10),
            workers=1,  # self.config['training'].get("workers", 1),
            use_multiprocessing=self.config['training'].get(
                "use_multiprocessing", False),
        )

        _, self.stub, _ = grpc_util.get_grpc_connection(
            grpc_function='train_model_response',
            request=globalserver_pb2.DefaultRequest(
                client=self.client,
                secret=self.secret,
                task_id=task_id,
                experiment_id=experiment_id))
        logging.info("Training finished. %s", self.experiment_id)
        tf.keras.backend.clear_session()
        gc.collect()
        return True