def fit_TL(self):
     if self.optimize_method == 'deap':
         from Fuzzy_clustering.version2.sklearn_models.sklearn_models_deap import sklearn_model
     elif self.optimize_method == 'optuna':
         from Fuzzy_clustering.version2.sklearn_models.sklearn_models_optuna import sklearn_model
     elif self.optimize_method == 'skopt':
         from Fuzzy_clustering.version2.sklearn_models.sklearn_models_skopt import sklearn_model
     else:
         from Fuzzy_clustering.version2.sklearn_models.sklearn_models_grid import sklearn_model
     static_data_tl = self.static_data['tl_project']['static_data']
     cluster_dir_tl = os.path.join(static_data_tl['path_model'],
                                   'Regressor_layer/' + self.cluster_name)
     model_sklearn_TL = sklearn_model(static_data_tl, cluster_dir_tl,
                                      static_data_tl['rated'], self.method,
                                      self.njobs)
     if self.istrained == False:
         cvs = self.load_data()
         model_sklearn = sklearn_model(self.static_data, self.sk_models_dir,
                                       self.rated, self.method, self.njobs)
         if model_sklearn.istrained == False:
             self.models[self.method] = model_sklearn.train_TL(
                 cvs, model_sklearn_TL.best_params)
         else:
             self.models[self.method] = model_sklearn.to_dict()
         self.istrained = True
         self.save()
     return 'Done'
    def fit(self):
        if self.optimize_method == 'deap':
            from Fuzzy_clustering.version2.sklearn_models.sklearn_models_deap import sklearn_model
        elif self.optimize_method == 'optuna':
            from Fuzzy_clustering.version2.sklearn_models.sklearn_models_optuna import sklearn_model
        elif self.optimize_method == 'skopt':
            from Fuzzy_clustering.version2.sklearn_models.sklearn_models_skopt import sklearn_model
        else:
            from Fuzzy_clustering.version2.sklearn_models.sklearn_models_grid import sklearn_model

        if self.istrained == False:
            cvs = self.load_data()
            model_sklearn = sklearn_model(self.static_data,
                                          self.sk_models_dir,
                                          self.rated,
                                          self.method,
                                          self.njobs,
                                          path_group=self.path_group)
            if model_sklearn.istrained == False:
                print('Train ', self.method, ' ', self.cluster_name)
                self.models[self.method] = model_sklearn.train(cvs)
            else:
                self.models[self.method] = model_sklearn.to_dict()
            self.istrained = True
            self.save()
        return 'Done'
Esempio n. 3
0
    def train(self):
        if len(self.combine_methods) > 1:
            pred_cluster, predictions, y_pred = self.project.predict_clusters()

            self.combine_methods = [
                method for method in self.combine_methods
                if method in predictions.keys()
            ]
            self.models = dict()
            for method in self.combine_methods:
                pred = predictions[method].values.astype('float')
                pred[np.where(np.isnan(pred))] = 0

                cvs = []
                for _ in range(3):
                    X_train, X_test1, y_train, y_test1 = train_test_split(
                        pred, y_pred.values, test_size=0.15)
                    X_train, X_val, y_train, y_val = train_test_split(
                        X_train, y_train, test_size=0.15)
                    cvs.append(
                        [X_train, y_train, X_val, y_val, X_test1, y_test1])
                mlp_model = sklearn_model(
                    self.static_data,
                    self.model_dir + '/' + method,
                    self.rated,
                    'mlp',
                    self.n_jobs,
                    path_group=self.static_data['path_group'])
                if mlp_model.istrained == False:
                    self.models['mlp_' + method] = mlp_model.train(cvs)
                else:
                    self.models['mlp_' + method] = mlp_model.to_dict()
            combine_method = 'bcp'
            for method in self.combine_methods:
                self.models['bcp_' + method] = self.bcp_fit(
                    predictions[method].values.astype('float'), y_pred.values)

        else:
            self.combine_methods = ['average']

        self.istrained = True
        self.save(self.model_dir)
        return 'Done'
Esempio n. 4
0
    def train(self):
        X_test, y_test, act_test, X_cnn_test, X_lstm_test = self.load_data()
        if X_test.shape[0] > 0 and len(
                self.methods) > 1 and self.istrained == False:
            if self.model_type in {'pv', 'wind'}:
                if self.resampling == True:
                    pred_resample, y_resample, results = self.resampling_for_combine(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
                else:
                    pred_resample, y_resample, results = self.without_resampling(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
            elif self.model_type in {'load'}:
                if self.resampling == True:
                    pred_resample, y_resample, results = self.resampling_for_combine(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
                else:
                    pred_resample, y_resample, results = self.without_resampling(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
            elif self.model_type in {'fa'}:
                if self.resampling == True:
                    pred_resample, y_resample, results = self.resampling_for_combine(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
                else:
                    pred_resample, y_resample, results = self.without_resampling(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)

            self.best_methods = results.nsmallest(4, 'mae').index.tolist()
            results = results.loc[self.best_methods]
            results['diff'] = results['mae'] - results['mae'].iloc[0]
            best_of_best = results.iloc[np.where(
                results['diff'] <= 0.02)].index.tolist()
            if len(best_of_best) == 1:
                best_of_best.extend(
                    [best_of_best[0], best_of_best[0], self.best_methods[1]])
            elif len(best_of_best) == 2:
                best_of_best.extend([best_of_best[0], best_of_best[0]])
            elif len(best_of_best) == 3:
                best_of_best.append(best_of_best[0])

            self.best_methods = best_of_best
            X_pred = np.array([])
            for method in sorted(self.best_methods):
                if X_pred.shape[0] == 0:
                    X_pred = pred_resample[method]
                else:
                    X_pred = np.hstack((X_pred, pred_resample[method]))
            X_pred[np.where(X_pred < 0)] = 0
            X_pred, y_resample = shuffle(X_pred, y_resample)
            self.weight_size = len(self.best_methods)
            self.model = dict()
            for combine_method in self.combine_methods:
                if combine_method == 'rls':
                    self.logger.info('RLS training')
                    self.logger.info('/n')
                    self.model[combine_method] = dict()
                    w = self.rls_fit(X_pred, y_resample)

                    self.model[combine_method]['w'] = w

                elif combine_method == 'bcp':
                    self.logger.info('BCP training')
                    self.logger.info('/n')
                    self.model[combine_method] = dict()
                    w = self.bcp_fit(X_pred, y_resample)
                    self.model[combine_method]['w'] = w

                elif combine_method == 'mlp':
                    self.logger.info('MLP training')
                    self.logger.info('/n')
                    cvs = []
                    for _ in range(3):
                        X_train1, X_test1, y_train1, y_test1 = train_test_split(
                            X_pred, y_resample, test_size=0.15)
                        X_train, X_val, y_train, y_val = train_test_split(
                            X_train1, y_train1, test_size=0.15)
                        cvs.append(
                            [X_train, y_train, X_val, y_val, X_test1, y_test1])
                    mlp_model = sklearn_model(
                        self.static_data,
                        self.model_dir,
                        self.rated,
                        'mlp',
                        self.n_jobs,
                        is_combine=True,
                        path_group=self.static_data['path_group'])
                    self.model[combine_method] = mlp_model.train(cvs)

                elif combine_method == 'bayesian_ridge':
                    self.logger.info('bayesian_ridge training')
                    self.logger.info('/n')
                    self.model[combine_method] = BayesianRidge()
                    self.model[combine_method].fit(X_pred, y_resample)

                elif combine_method == 'elastic_net':
                    self.logger.info('elastic_net training')
                    self.logger.info('/n')
                    self.model[combine_method] = ElasticNetCV(cv=5)
                    self.model[combine_method].fit(X_pred, y_resample)
                elif combine_method == 'ridge':
                    self.logger.info('ridge training')
                    self.logger.info('/n')
                    self.model[combine_method] = RidgeCV(cv=5)
                    self.model[combine_method].fit(X_pred, y_resample)
            self.logger.info('End of combine models training')
        else:
            self.combine_methods = ['average']
        self.istrained = True
        self.save(self.model_dir)

        return 'Done'
    def fit(self, cvs):
        # logger = logging.getLogger('log_fs_boruta.log')
        # logger.setLevel(logging.INFO)
        # handler = logging.FileHandler(os.path.join(self.log_dir, 'log_fs_boruta.log'), 'w')
        # handler.setLevel(logging.INFO)
        #
        # # create a logging format
        # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        # handler.setFormatter(formatter)
        #
        # # add the handlers to the logger
        # logger.addHandler(handler)

        print()
        print('Training the model (Fitting to the training data) ')
        # logger.info('Training the feature extraction ')
        X = np.vstack((cvs[0][0], cvs[0][2], cvs[0][4]))

        if len(cvs[0][1].shape) == 1 and len(cvs[0][5].shape) == 1:
            y = np.hstack((cvs[0][1], cvs[0][3], cvs[0][5]))
        else:
            y = np.vstack((cvs[0][1], cvs[0][3], cvs[0][5])).ravel()
        self.D, self.N = X.shape
        self.N_tot = X.shape[1]

        ncpus = joblib.load(os.path.join(self.path_group, 'total_cpus.pickle'))
        gpu_status = joblib.load(
            os.path.join(self.path_group, 'gpu_status.pickle'))

        njobs = int(ncpus - gpu_status)
        cpu_status = njobs
        joblib.dump(cpu_status,
                    os.path.join(self.path_group, 'cpu_status.pickle'))

        regressor = sklearn_model(self.static_data,
                                  self.log_dir,
                                  1,
                                  'rf',
                                  njobs,
                                  path_group=self.path_group)
        if regressor.istrained == False:
            regressor.train(cvs, FS=True)

        # Update classifier parameters
        estimator = regressor.model
        estimator.set_params(n_jobs=self.njobs)
        self.init_params = [regressor.best_params]
        # Define steps
        step1 = {'Constant Features': {'frac_constant_values': 0.999}}

        step2 = {'Correlated Features': {'correlation_threshold': 0.999}}

        step3 = {
            'Relevant Features': {
                'cv': 3,
                'estimator': estimator,
                'n_estimators': 500,
                'max_iter': 20,
                'verbose': 0,
                'random_state': 42
            }
        }

        step4 = {
            'RFECV Features': {
                'cv': 3,
                'estimator': estimator,
                'step': 1,
                'scoring': 'neg_root_mean_squared_error',
                'verbose': 50
            }
        }

        # Place steps in a list in the order you want them execute it
        steps = [step1, step2, step3]
        columns = ['other_' + str(i) for i in range(X.shape[1])]
        X_df = pd.DataFrame(X, columns=columns)
        # Initialize FeatureSelector()
        fs = FeatureSelector()

        # Apply feature selection methods in the order they appear in steps
        fs.fit(X_df, y.ravel(), steps)
        features = [
            i for i in range(len(X_df.columns))
            if X_df.columns[i] in fs.selected_features
        ]

        # Get selected features
        self.features = np.array(features)

        # logger.info('best score %s', str(best_score))
        # logger.info('Number of variables %s', str(self.features.shape[0]))
        # logger.info('Finish the feature extraction ')
        if self.features.shape[0] > 48:
            pca = self.reduce_dim(cvs)
        else:
            pca = None
            # logger.info('Number of variables %s', str(self.features.shape[0]))
            # logger.info('Finish the feature extraction ')
        return features, pca