Example #1
0
def test_combine_module():
    from Fuzzy_clustering.ver_tf2.Combine_module_train import combine_model

    X, y, act, X_cnn, X_lstm, test_indices = load_data()
    X, y, act, X_cnn, X_lstm, X_test, y_test, act_test, X_cnn_test, X_lstm_test = split_test_data(
        X, y, act, X_cnn=X_cnn, X_lstm=X_lstm, test_indices=test_indices)
    comb_model = combine_model(static_data, cluster_dir, model.sc)
    comb_model.istrained = False
    comb_model.train(X_test, y_test, act_test, X_cnn_test, X_lstm_test)
Example #2
0
    def fit(self, rule_model=None):
        X, y, act, X_cnn, X_lstm, test_indices = self.load_data()
        self.variables = X.columns
        indices = X.index
        X, y, act, X_cnn, X_lstm, X_test, y_test, act_test, X_cnn_test, X_lstm_test = self.split_test_data(X, y,
                                                                                                           act,
                                                                                                           X_cnn=X_cnn,
                                                                                                           X_lstm=X_lstm,
                                                                                                           test_indices=test_indices)
        X = X.values
        y = y.values / 20
        act = act.values


        if len(y.shape)==1:
            y = y[:, np.newaxis]
        if len(act.shape)==1:
            act = act[:, np.newaxis]

        if not 'features' in rule_model.keys():
            raise ValueError('the Main rule has not attribute features Global models')
        self.features = rule_model['features']
        cvs, mask_test1, X, y, act, X_cnn, X_lstm = self.split_dataset(X, y, act, X_cnn, X_lstm)
        self.indices = indices[:X.shape[0]]
        for i in range(3):
            cvs[i][0] = cvs[i][0][:, self.features]
            cvs[i][2] = cvs[i][2][:, self.features]
            cvs[i][4] = cvs[i][4][:, self.features]

        self.models = dict()
        for method in self.static_data['project_methods'].keys():
            if self.static_data['project_methods'][method]['Global'] == True:
                if 'sklearn_method' in self.static_data['project_methods'][method].keys():
                    optimize_method = self.static_data['project_methods'][method]['sklearn_method']
                else:
                    optimize_method = []

                self.fit_model(cvs, method, self.static_data, self.cluster_dir, rule_model['models'], self.gpu, X_cnn=X_cnn, X_lstm=X_lstm, y=y, rated=1)

        comb_model = combine_model(self.static_data, self.cluster_dir, x_scaler=self.x_scaler, is_global=True)
        if comb_model.istrained == False and X_test.shape[0] > 0:
            comb_model.train(X_test, y_test, act_test, X_cnn_test, X_lstm_test)

            predict_module = global_predict(self.static_data)
            predictions = predict_module.predict(X_test.values, X_cnn=X_cnn_test, X_lstm= X_lstm_test)
            result = predict_module.evaluate(predictions, y_test.values)
            result.to_csv(os.path.join(self.data_dir, 'result_test.csv'))

        self.istrained = True
        self.save(self.cluster_dir)

        return self.to_dict()
    def fit(self):

        if not self.istrained:
            X, y, act, X_cnn, X_lstm, test_indices = self.load_data()
            self.logger.info('Start training cluster %s', self.cluster_name)
            self.logger.info('/n')
            self.variables = X.columns
            indices = X.index
            X, y, act, X_cnn, X_lstm, X_test, y_test, act_test, X_cnn_test, X_lstm_test = self.split_test_data(
                X,
                y,
                act,
                X_cnn=X_cnn,
                X_lstm=X_lstm,
                test_indices=test_indices)
            if X_test.shape[0] > 0:
                lin_models = LinearRegression().fit(X[self.var_lin].values,
                                                    y.values.ravel())
                preds = lin_models.predict(X_test[self.var_lin].values).ravel()

                err = (preds - y_test.values.ravel()) / 20

                rms = np.sum(np.square(err))
                mae = np.mean(np.abs(err))
                print('rms = %s', rms)
                print('mae = %s', mae)
                self.logger.info("Objective from linear models: %s", mae)
            X = X.values
            y = y.values / 20
            act = act.values

            if len(y.shape) == 1:
                y = y[:, np.newaxis]
            if len(act.shape) == 1:
                act = act[:, np.newaxis]

            try:
                self.load(self.cluster_dir)
            except:
                pass

            if hasattr(
                    self,
                    'features') and self.static_data['train_online'] == False:
                pass
            else:
                if self.static_data['sklearn']['fs_status'] != 'ok':
                    X_train, X_test1, y_train, y_test1 = split_continuous(
                        X, y, test_size=0.15, random_state=42)

                    cvs = []
                    for _ in range(3):
                        X_train1 = np.copy(X_train)
                        y_train1 = np.copy(y_train)
                        X_train1, X_val, y_train1, y_val = train_test_split(
                            X_train1, y_train1, test_size=0.15)
                        cvs.append([
                            X_train1, y_train1, X_val, y_val, X_test1, y_test1
                        ])
                    self.find_features(
                        cvs, self.static_data['sklearn']['fs_method'],
                        self.static_data['sklearn']['njobs'])

            cvs, mask_test1, X, y, act, X_cnn, X_lstm = self.split_dataset(
                X, y, act, X_cnn, X_lstm)
            self.indices = indices[:X.shape[0]]
            for i in range(3):
                cvs[i][0] = cvs[i][0][:, self.features]
                cvs[i][2] = cvs[i][2][:, self.features]
                cvs[i][4] = cvs[i][4][:, self.features]

            self.logger.info('Data info for cluster %s', self.cluster_name)
            self.logger.info('Number of variables %s', str(self.D))
            self.logger.info('Number of total samples %s', str(self.N_tot))
            self.logger.info('Number of training samples %s',
                             str(self.N_train))
            self.logger.info('Number of validation samples %s',
                             str(self.N_val))
            self.logger.info('Number of testing samples %s', str(self.N_test))
            self.logger.info('/n')

            self.models = dict()
            for method in self.static_data['project_methods'].keys():
                if self.static_data['project_methods'][method][
                        'status'] == 'train':
                    self.logger.info('Training start of method %s', method)
                    self.logger.info('/n')
                    if 'sklearn_method' in self.static_data['project_methods'][
                            method].keys():
                        optimize_method = self.static_data['project_methods'][
                            method]['sklearn_method']
                    else:
                        optimize_method = []
                    self.fit_model(cvs,
                                   method,
                                   self.static_data,
                                   self.cluster_dir,
                                   optimize_method,
                                   X_cnn=X_cnn,
                                   X_lstm=X_lstm,
                                   y=y,
                                   rated=1)
                    self.logger.info('Training end of method %s', method)

            self.logger.info('Training end for cluster %s', self.cluster_name)
            self.logger.info('/n')
            self.logger.info('Start of training of Combination models')
            comb_model = combine_model(self.static_data,
                                       self.cluster_dir,
                                       x_scaler=self.x_scaler)
            if comb_model.istrained == False and X_test.shape[0] > 0:
                comb_model.train(X_test, y_test, act_test, X_cnn_test,
                                 X_lstm_test)

                predict_module = cluster_predict(self.static_data,
                                                 self.cluster_name)
                predictions = predict_module.predict(X_test.values,
                                                     X_cnn=X_cnn_test,
                                                     X_lstm=X_lstm_test)
                result = predict_module.evaluate(predictions, y_test.values)
                result.to_csv(os.path.join(self.data_dir, 'result_test.csv'))

            self.istrained = True
            self.save(self.cluster_dir)
        else:
            self.logger.info('Cluster of %s loaded successfully',
                             self.cluster_name)

        return self.to_dict()