Example #1
0
 def _parallel_train(self, data, model):
     px.train(data=data,
              iters=100,
              shared_states=False,
              in_model=model,
              mode=CONFIG.MODELTYPE,
              opt_regularization_hook=CONFIG.REGULARIZATION)
Example #2
0
    def _aggregate(self, opt, **kwargs):
        naivekl = np.zeros(self.model[0].weights.shape[0])
        K = self.K
        X = self.X
        if opt:
            logger.debug("===KL CREATE DATA===")
            data = np.ascontiguousarray(X, dtype=np.uint16)
            data = np.ascontiguousarray(
                np.vstack((data, self.states - 1)).astype(np.uint16))
            logger.debug("===KL CREATE DUMMY MODEL===")
            model = px.train(data=data, graph=self.graph, iters=0)
            s = np.ctypeslib.as_array(model.empirical_stats,
                                      shape=(model.dimension, ))
            s -= model.phi((self.states - 1).ravel())
            model.num_instances -= 1
            logger.debug("===KL TRAIN BOOTSTRAP===")
            res = px.train(in_model=model,
                           opt_regularization_hook=CONFIG.REGULARIZATION)
            logger.debug("===KL MERGE WEIGHTS===")
            weights = np.ascontiguousarray(np.copy(res.weights))
            states = np.ascontiguousarray(self.states)
            logger.debug("===KL CREATE RESULT MODEL===")
            kl_model = px.Model(weights=weights.astype(np.float64),
                                graph=self.graph,
                                states=states)
        """
        else:
            average_statistics = []
            for i, samples in enumerate(X):
                avg = np.mean([self.phi[i](x) for x in samples], axis=0)
                average_statistics.append(avg)
            self.average_suff_stats = average_statistics
            x0 = np.zeros(self.model[0].weights.shape[0])
            obj = partial(self.naive_kl, average_statistics=average_statistics,
                          graph=self.model[0].graph,
                          states=np.copy(self.model[0].states))
            res = minimize(obj, x0, callback=self.callback, tol=self.eps, options={"maxiter": 50, "gtol": 1e-3})
            kl_model = px.Model(weights=res.x, graph=self.model[0].graph, states=self.model[0].states)
        """
        naivekl += np.copy(kl_model.weights)
        # self.test(kl_model)
        """
        try:
            fisher_matrix = []
            inverse_fisher = []
            for i in range(K):
                fisher_matrix.append(self.fisher_information(i, kl_m[:kl_model.weights.shape[0]], kl_model.weights))
                inverse_fisher.append(np.linalg.inv(fisher_matrix[i]))
        except np.linalg.LinAlgError as e:
            pass
        """

        return kl_model.weights
Example #3
0
 def _create_graph(self):
     """
         Creates an independency structure (graph) from data. The specified mode for the independency structure is used,
         when creating this object.
     """
     holdout = np.ascontiguousarray(self.data_set.holdout.to_numpy().astype(
         np.uint16))
     self.edgelist = np.copy(
         px.train(
             data=holdout,
             graph=CONFIG.GRAPHTYPE,
             iters=1,
             mode=CONFIG.MODELTYPE,
             opt_regularization_hook=CONFIG.REGULARIZATION).graph.edgelist)
     self.graph = self._px_create_graph()
     self.weights = self.init_weights()
Example #4
0
 def run_training(self, data, obs, split, progr_train):
     import pxpy as px
     data = self.setup_for_cv(split, data, obs)
     missing = data == HIDE_VAL
     overall_loss = []
     for emiter in range(self.emiters):
         self.obj = sys.maxsize
         loss = []
         new_modelname = self.model_filename + '_{}'.format(emiter)
         if not os.path.isfile(new_modelname):
             if emiter != 0:  # load the previous model and fill data gaps with gibbs
                 data[missing] = HIDE_VAL
                 with warnings.catch_warnings(record=True):
                     warnings.simplefilter('ignore')
                     prev_model = px.load_model(self.model_filename +
                                                '_{}'.format(emiter - 1))
                     self.predict_gibbs(prev_model, data)
             else:
                 prev_model = None
             with warnings.catch_warnings(record=True):
                 warnings.simplefilter('ignore')
                 model = px.train(
                     data=data,
                     iters=sys.maxsize,
                     graph=px.create_graph(self.edges),
                     mode=getattr(px.ModelType, self.mode),
                     shared_states=bool(self.shared),
                     in_model=prev_model,
                     opt_regularization_hook=self.regularize,
                     opt_progress_hook=(
                         lambda x, em=emiter, loss=loss: self.
                         check_progress(x, progr_train, em, loss)))
                 model.save(new_modelname)
                 model.graph.delete()
                 model.delete()
             overall_loss.append(('EM Iter ' + str(emiter), loss))
         progr_train[self.split] = (100.0 / self.emiters) * (emiter + 1)
         self.cons.progress(progr_train, self.split)
     self.plot_convergence(overall_loss)
     super().run_training(data, obs, split,
                          progr_train)  # for final console output
Example #5
0
    def __init__(self, states, edgelist=None, seed=None):
        super(Synthetic, self).__init__()
        n_vars = 15
        n_samples = 1000
        n_states = 10
        self.random_state = np.random.RandomState(seed=seed)
        # Generate random cov
        cov = self.random_state.randn(n_vars, n_vars)
        cov = np.dot(cov, cov.T) / n_vars

        # Generate data from normal
        self.data = pd.DataFrame(
            scipy.stats.multivariate_normal(mean=np.zeros(n_vars),
                                            cov=np.dot(cov, cov.T) /
                                            n_vars).rvs(n_samples))

        data_disc, disc_ttt = px.discretize(data=self.data,
                                            num_states=n_states)

        # Add sample to ensure same state space for each variable
        data_disc = np.concatenate([
            data_disc,
            np.full(shape=(1, n_vars),
                    fill_value=n_states - 1,
                    dtype=np.uint16)
        ])

        # Generate model
        self.global_model = px.train(data_disc,
                                     graph=px.GraphType.auto_tree,
                                     mode=px.ModelType.mrf,
                                     iters=0)
        self.global_weights = np.copy(self.global_model.weights)
        # TODO: Remove the statistics for full point.

        edgelist = self.global_model.graph.edgelist
        stats = self.global_model.statistics
Example #6
0
        cov[a[x]:a[x + 1], a[x]:a[x + 1]] = - rhs[a[x]:a[x + 1], a[x]:a[x + 1]]
    cov -= np.diag(np.diag(cov))
    cov += diag + np.diag(np.full(model.weights.shape[0], eps))

    return cov

if __name__ == '__main__':
    data = main()

    res = None
    for arr in data:
        res = arr if res is None else np.vstack((res, arr))

    res = np.ascontiguousarray(res, dtype=np.float64)
    disc, M = px.discretize(res, 10)
    model = px.train(disc, graph=px.GraphType.auto_tree, iters=10000)
    gen_semi_random_cov(model, 1e-1)
    mu, A = model.infer()
    vars = model.weights.shape[0]
    mu = mu[:vars]
    fi = np.outer(mu - model.statistics, mu - model.statistics)
    phis = []
    for d in disc:
        phis.append(model.phi(d))
    cov_XY = np.cov(np.array(phis).T)
    EX_EY = np.outer(mu, mu)
    E_XY = cov_XY + EX_EY
    new_data = os.path.join(CONFIG.ROOT_DIR, "data")
    os.chdir(new_data)
    os.mkdir("SYNTH")
    df = pd.DataFrame(disc)
Example #7
0
    def predict(self, weights=None, n_test=None):
        logger.debug("===PREDICT PREPARE DATA===")
        test = np.ascontiguousarray(self.data_set.test.to_numpy().astype(
            np.uint16))

        tmp = np.ascontiguousarray(
            np.full(shape=(1, self.state_space.shape[0]),
                    fill_value=self.state_space,
                    dtype=np.uint16))
        tmp_test = np.vstack((test, tmp))

        test_model = px.train(data=tmp_test,
                              graph=self._px_create_graph(),
                              mode=px.ModelType.mrf,
                              opt_regularization_hook=CONFIG.REGULARIZATION,
                              iters=0,
                              k=4)

        test_model = self.scale_phi_emp(test_model)
        statistics = np.copy(test_model.statistics)

        if weights is not None:
            np.copyto(test_model.weights, weights)
            _, a = test_model.infer()
            test_ll = [a - np.inner(weights, statistics)]
            test_model.delete()
        else:
            partitions = []
            if CONFIG.MODELTYPE == px.ModelType.integer:
                for mod in self.px_model_scaled:
                    if test_model.weights.shape[0] != mod.weights.shape[0]:
                        print("error")
                    np.copyto(test_model.weights, mod.weights)
                    _, a = test_model.infer()
                    partitions.append(a)
            else:
                for mod in self.px_model:
                    if test_model.weights.shape[0] != mod.weights.shape[0]:
                        print("error")
                    np.copyto(test_model.weights, mod.weights)
                    _, a = test_model.infer()
                    partitions.append(a)
        test_model.delete()
        if isinstance(self.data_set.label_column, str):
            label_column_idx = self.data_set.test.columns.get_loc(
                self.data_set.label_column)
            test[:, label_column_idx] = -1
        else:
            test[:, self.data_set.label_column] = -1
        if n_test is None:
            n_test = test.shape[0] - 1
        else:
            n_test = np.min([n_test, test.shape[0] - 1])
        test = np.ascontiguousarray(test[:n_test])
        logger.debug("===PREDICT START PREDICTIONS===")
        if weights is None:
            logger.debug("===PREDICT ALL LOCAL MODELS===")
            if self.trained:
                if CONFIG.MODELTYPE == px.ModelType.integer:
                    predictions = [
                        px_model.predict(
                            np.ascontiguousarray(np.copy(test[:n_test])))
                        for px_model in self.px_model_scaled
                    ]
                    test_ll = [
                        partitions[i] -
                        np.inner(self.px_model_scaled[i].weights, statistics)
                        for i in range(len(self.px_model_scaled))
                    ]
                    return predictions, test_ll
                else:
                    test_ll = [
                        partitions[i] -
                        np.inner(self.px_model[i].weights, statistics)
                        for i in range(len(self.px_model))
                    ]
                    return [
                        px_model.predict(
                            np.ascontiguousarray(np.copy(test[:n_test])))
                        for px_model in self.px_model
                    ], test_ll
        else:
            logger.debug("===PREDICT INPUT MODEL===")
            px_model = px.Model(weights=weights,
                                graph=px.create_graph(self.edgelist),
                                states=self.state_space + 1)
            return px_model.predict(test[:n_test]), test_ll
Example #8
0
 def scale_model(self, model, data):
     weights = np.log(2) * np.ascontiguousarray(np.copy(model.weights))
     res = px.train(data=data, graph=self.graph, iters=0)
     res = self.scale_phi_emp(res)
     np.copyto(res.weights, weights)
     return res
Example #9
0
    def train(self,
              epochs=1,
              iters=100,
              split=None,
              n_models=None,
              mode=px.ModelType.mrf):
        """
        TODO

        Parameters
        ----------
        epochs : int
            Number of outer iterations
        iters: int
            Number of inner iteration (per call of px.train)
        split: Split
            class:src.preprocessing.split.Split Contains the number of splits and thus the number of models to be trained.
            Each split will be distributed to a single device/model.

        Raises
        ------
        RuntimeError

        Returns
        -------
            None
        """
        self.maxiter = iters
        self.epoch += 1
        models = []
        scaled_models = []
        train = np.ascontiguousarray(self.data_set.train.to_numpy().astype(
            np.uint16))

        # Timing
        start = time.time()
        update = None
        iter_time = None

        # Initialization for best Params
        if split is None:
            total_models = 1
            split = [np.arange(train.shape[0])]
        else:
            total_models = len(split) if n_models is None else np.min(
                [len(split), n_models])

        self.best_weights[self.epoch] = [0] * total_models
        self.best_objs[self.epoch] = [np.infty] * total_models
        self.px_batch_local[self.epoch] = [0] * total_models
        # Distributed Training
        for i, idx in enumerate(split):
            self.curr_iter = 0
            if len(split) > 1:
                self.n_local_data = np.int64(
                    np.min([
                        np.ceil(self.data_delta *
                                self.sample_func(self.epoch)), idx.shape[0]
                    ]))
            else:
                self.n_local_data = idx.shape[0]
            self.curr_model = i
            if n_models is not None:
                if i >= n_models:
                    break
            update, _ = log_progress(start, update, iter_time, total_models, i)
            logger.info("===TRAINING=== CREATE DATA===")
            tmp = np.ascontiguousarray(
                np.full(shape=(1, self.state_space.shape[0]),
                        fill_value=self.state_space,
                        dtype=np.uint16))
            data = np.ascontiguousarray(
                np.copy(train[idx[:self.n_local_data].flatten()]))
            data = np.vstack((data, tmp))

            model = px.train(data=data,
                             graph=self.graph,
                             mode=CONFIG.MODELTYPE,
                             opt_regularization_hook=CONFIG.REGULARIZATION,
                             iters=0,
                             k=4)

            model = self.scale_phi_emp(model)
            if self.epoch > 1 and CONFIG.FEEDBACK:
                np.copyto(model.weights,
                          np.ascontiguousarray(self.best_aggregate))
                _, A = model.infer()
                logger.info("LOCAL LL WITH AGGREGATE WEIGHTS :" +
                            str(A - np.inner(model.weights, model.statistics)))
            logger.info("===TRAINING=== START TRAINING===")
            model = px.train(iters=iters,
                             shared_states=False,
                             opt_progress_hook=self.opt_progress_hook,
                             mode=CONFIG.MODELTYPE,
                             in_model=model,
                             opt_regularization_hook=CONFIG.REGULARIZATION,
                             k=4)
            logger.info("===TRAINING=== FINISHED TRAINING===")
            self.reset_train()

            if len(split) > 1:
                self.px_batch_local[self.epoch][i] = model
                #model = self.merge_weights(model)
            if self.epoch == 1:
                models.append(model)
                if CONFIG.MODELTYPE == px.ModelType.integer:
                    scaled_models.append(self.scale_model(model, data))
            else:
                self.px_model[i] = model
                if CONFIG.MODELTYPE == px.ModelType.integer:
                    scaled_model = self.scale_model(model, data)
                    self.px_model_scaled[i] = scaled_model
            iter_time = time.time()

        if not self.px_model:
            self.px_model = models
            if CONFIG.MODELTYPE == px.ModelType.integer:
                self.px_model_scaled = scaled_models
        self.px_batch[self.epoch] = self.px_model
        end = time.time()
        logger.info("Finished Training Models: " +
                    "{:.2f} s".format(end - start))

        if not self.trained:
            self.trained = True