예제 #1
0
    def svm_model(self,data_kind='train_data'):
        self.data_kind=data_kind
        data_file=get_train_data_path(self.fc,self.fj,self.model_kind)
        df = pd.read_csv(data_file, encoding='utf-8', index_col=0,low_memory=False)
        print(df.shape)
        traindata = df.iloc[:, :].values
        x = traindata[:, :-1]
        y = traindata[:, -1]
        x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8)  # list
        print('when training,train data number:',len(y_train))
        print('when training,test data number:',len(y_test))
        print('training model:',self.model_kind)
        raw_model=SVR()
        parameters = [
            {
                'C': [3, 7, 9, 15,  19],
                'gamma': [  0.001,  1, 10, ],
                'kernel': ['rbf']
            }
        ]
        gsc=GridSearchCV(raw_model,parameters,cv=3)
        gsc.fit(x_train,y_train)

        raw_model.fit(x_train, y_train)
        print('svm best parameters:', gsc.best_params_)
        print(self.model_path)
        joblib.dump(gsc,self.model_path+self.model_name)
        pred = raw_model.predict(x_test)

        self.save_result_dataframe(y_test,pred,)
        self.set_var(true_v=y_test,pred_v=pred)
        self.show_save_figure(detal_idx=4)
        t_mean=self.cal_mean(self.true)
        p_mean=self.cal_mean(self.pred)
        self.save_result(true_mean=t_mean, pred_mean=p_mean,train_n=len(x_train),test_n=len(x_test))
예제 #2
0
 def get_data(self):
     train_data = get_train_data_path(fc=self.fc,
                                      fj=self.fj,
                                      model_kind=self.model_kind)
     df = pd.read_csv(train_data, encoding='utf-8', index_col=0)
     data_set = df.iloc[:, :].values
     x_dataset = data_set[:, :-1]
     y_dataset = data_set[:, -1]
     self.x_train_mean = None
     self.x_train_std = None
     normalized_data = data_normalized(x_dataset)
     x_train, x_test, y_train, y_test = train_test_split(normalized_data,
                                                         y_dataset,
                                                         train_size=0.6)
     print(x_train[0], len(x_train))
     print(y_train[0], len(y_train))
     if self.data_kind == 'train':
         self.train_x_batch = []
         self.train_y_batch = []
         self.batch_index = []
         for i in range(len(x_train) - self.time_step - 1):
             if i % self.batch_size == 0:
                 self.batch_index.append(i)
             x = x_train[i:i + self.time_step]
             y = y_train[i:i + self.time_step]
             self.train_x_batch.append(x.tolist())
             self.train_y_batch.append(
                 np.reshape(y, newshape=(self.time_step, self.output_size)))
         self.test_x_batch = []
         self.test_y_batch = []
         self.test_batch_index = []
         for i in range(len(x_test) - self.time_step - 1):
             if i % self.batch_size == 0:
                 self.test_batch_index.append(i)
             x = x_train[i:i + self.time_step]
             y = y_train[i:i + self.time_step]
             self.test_x_batch.append(x.tolist())
             self.test_y_batch.append(
                 np.reshape(y, newshape=(self.time_step, self.output_size)))
     elif self.data_kind == 'fault_test':
         fault_test_data = get_test_data_path()
         falut_df = pd.read_csv(fault_test_data,
                                encoding='utf-8',
                                index_col=0)
         fault_data_set = falut_df.iloc[:, :].values
         fault_x_dataset = fault_data_set[:, :-1]
         fault_y_dataset = fault_data_set[:, -1]
         fault_normalized_data = data_normalized(fault_x_dataset)
         print(len(fault_normalized_data), len(fault_y_dataset))
         self.train_x_batch = []
         self.self.tmp_y_batch = []
         batch_index = []
         for i in range(len(fault_normalized_data) - self.time_step - 1):
             if i % self.batch_size == 0:
                 self.batch_index.append(i)
             x = fault_normalized_data[i:i + self.time_step]
             y = fault_y_dataset[i:i + self.time_step]
             self.train_x_batch.append(x.tolist())
             self.train_y_batch.append(
                 np.reshape(y, newshape=(self.time_step, self.output_size)))
예제 #3
0
 def train_lightgbm_model(self, data_kind='train'):
     self.data_kind = data_kind
     data_file = get_train_data_path(self.fc, self.fj, self.model_kind,
                                     self.params_kind)
     df = pd.read_csv(data_file,
                      index_col=0,
                      encoding='utf-8',
                      low_memory=False)
     #print(df.columns)
     print('df shape:', df.shape)
     traindata = df.iloc[:, :].values
     x = traindata[:, :-1]
     y = traindata[:, -1]
     x_train, x_test, y_train, y_test = train_test_split(
         x, y, train_size=0.8)  # list
     print(len(x_train), len(x_test))
     print('training lightgbm model')
     model = LGBMRegressor(boosting_type='gbdt',
                           num_leaves=self.num_leaves,
                           n_estimators=self.n_estimator,
                           max_depth=self.max_depth)
     model.fit(x_train, y_train)
     print(model.feature_importances_)
     print(model.best_score_)
     print(model.best_iteration_)
     joblib.dump(model, self.model_path + self.model_name)
     pred = model.predict(x_test)
     self.set_var(true_v=y_test, pred_v=pred)
     self.show_save_figure(detal_idx=8)
     t_mean = self.cal_mean(self.true)
     p_mean = self.cal_mean(self.pred)
     self.save_result(true_mean=t_mean,
                      pred_mean=p_mean,
                      train_n=len(x_train),
                      test_n=len(x_test))
예제 #4
0
 def get_data(self):
     data_file = get_train_data_path(self.fc, self.fj, self.model_kind)
     df = pd.read_csv(data_file,
                      encoding='utf-8',
                      index_col=0,
                      low_memory=False)
     print(df.shape)
     traindata = df.iloc[:, :].values
     x = traindata[:, :-1]
     y = traindata[:, -1]
     self.train_x, self.test_x, self.train_y, self.test_y = train_test_split(
         x, y, train_size=0.6)  # list
예제 #5
0
 def xgboostmodel(self, data_kind='train_data'):
     self.data_kind = data_kind
     data_file = get_train_data_path(self.fc, self.fj, self.model_kind,
                                     self.params_kind)
     df = pd.read_csv(data_file,
                      encoding='utf-8',
                      index_col=0,
                      low_memory=False)
     logger.info(df.columns)
     logger.info(df.shape)
     traindata = df.iloc[:, :].values
     x = traindata[:, :-1]
     y = traindata[:, -1]
     x_train, x_test, y_train, y_test = train_test_split(
         x, y, train_size=0.8)  # list
     logger.info('when training,train data number:{}'.format(
         str(len(y_train))))
     logger.info('when training,test data number:{}'.format(len(y_test)))
     logger.info('training model:{}'.format(self.model_kind))
     # params={'booster':'gbtree','objective':'reg:squarederror','eval_metric':'rmse','seed':0,'n_jobs':10,'max_depth':self.max_depth,'n_estimators':self.n_estimator,'min_child_weight':self.min_child_weight,
     #         'verbosity':1,'learning_rate':0.05}
     raw_model = xgb.XGBRegressor(max_depth=self.max_depth,
                                  n_estimators=self.n_estimator,
                                  learning_rate=0.02,
                                  silent=False,
                                  min_child_weight=self.min_child_weight,
                                  tree_mothod='gpu_hist')
     # raw_model = xgb.XGBRegressor(**params)
     raw_model.fit(x_train, y_train)
     logger.info(self.model_path)
     raw_model.save_model(self.model_path + self.model_file_name)
     pred = raw_model.predict(x_test)
     plot_importance(raw_model)
     if not os.path.exists(self.feature_importance_path):
         os.makedirs(self.feature_importance_path)
     plt.savefig(self.feature_importance_path + self.fj_model_kind +
                 '_feature_importance')
     plt.show()
     plt.close()
     self.save_result_dataframe(
         y_test,
         pred,
     )
     self.set_var(true_v=y_test, pred_v=pred)
     self.show_save_figure(detal_idx=4)
     t_mean = self.cal_mean(self.true)
     p_mean = self.cal_mean(self.pred)
     self.save_result(true_mean=t_mean,
                      pred_mean=p_mean,
                      train_n=len(x_train),
                      test_n=len(x_test))
예제 #6
0
 def get_data(self):
     train_data = get_train_data_path()
     df = pd.read_csv(train_data, encoding='utf-8', index_col=0)
     data_set = df.iloc[:, :].values
     x_train = data_set[:, :-1]
     y_train = data_set[:, -1]
     normalized_data = (x_train - np.mean(x_train)) / np.std(x_train)
     tmp_x_batch = []
     tmp_y_batch = []
     for i in range(len(data_set) - self.time_step - 1):
         x = normalized_data[i:i + self.time_step]
         y = y_train[i + self.time_step - 1]
         tmp_x_batch.append(x)
         tmp_y_batch.append(y)
     return tmp_x_batch, tmp_y_batch
예제 #7
0
 def feature_data_filter(self):
     data_file = get_train_data_path(self.fc,
                                     self.fj,
                                     self.model_kind,
                                     params_kind=self.params_kind)
     df = pd.read_csv(data_file, encoding='utf-8', index_col=0)
     print(df.columns)
     for f in self.params:
         df = self.recursive_filter_abnormal_data(df, f)
     if os.path.exists(self.mergedData_filtered_path +
                       self.filtered_data_file):
         print('remove file:' + self.mergedData_filtered_path +
               self.filtered_data_file)
         os.remove(self.mergedData_filtered_path + self.filtered_data_file)
     df.to_csv(self.mergedData_filtered_path + self.filtered_data_file,
               encoding='utf-8')
예제 #8
0
    def __init__(self,fc=None,fj=None,model_kind=None,params_kind=None):
        #self.dataFile = data_path + os.listdir(data_path)[0]
        self.dataFile=get_train_data_path(fc,fj,model_kind,params_kind)
        self.df = pd.read_csv(self.dataFile, encoding='utf-8', index_col=0)  # 省去索引
        result_path = cur_path + '/result/'
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        self.single_result_path = result_path + 'result_' +fc+'_'+fj+'_'+model_kind+ '/'
        if not os.path.exists(self.single_result_path):
            os.makedirs(self.single_result_path)
        self.figure_path=self.single_result_path+'_'+params_kind+'_'+ hour_minute + '/'

        if not os.path.exists(self.figure_path):
            os.makedirs(self.figure_path)
        self.model_number=str.split(model_kind,'_')[-1]
        self.params=ParamsDict().model_params[params_kind][model_kind]
예제 #9
0
    def adaboostmodel(self, data_kind='train'):
        self.data_kind = data_kind
        data_file = get_train_data_path(self.fc, self.fj, self.model_kind)
        df = pd.read_csv(data_file,
                         encoding='utf-8',
                         index_col=0,
                         low_memory=False)
        print(df.shape)
        traindata = df.iloc[:, :].values
        x = traindata[:, :-1]
        y = traindata[:, -1]
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, train_size=0.8)  # list6
        print('when training,train data number:', len(y_train))
        print('when training,test data number:', len(y_test))
        print('training model:', self.model_kind)
        raw_model = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(
            max_features=None,
            max_depth=self.max_depth,
            min_samples_split=20,
            min_samples_leaf=10,
            min_weight_fraction_leaf=0,
            max_leaf_nodes=None),
                                      learning_rate=0.01,
                                      loss='square',
                                      n_estimators=self.n_estimator)
        raw_model.fit(x_train, y_train)
        print(self.model_path)

        joblib.dump(raw_model, self.model_path + self.model_file_name)
        pred = raw_model.predict(x_test)

        self.save_result_dataframe(
            y_test,
            pred,
        )
        self.set_var(true_v=y_test, pred_v=pred)
        self.show_save_figure(detal_idx=4)
        t_mean = self.cal_mean(self.true)
        p_mean = self.cal_mean(self.pred)
        self.save_result(true_mean=t_mean,
                         pred_mean=p_mean,
                         train_n=len(x_train),
                         test_n=len(x_test))
예제 #10
0
    def __init__(self,job_name=None,fc=None,fj=None,model_kind=None):
        super().__init__()

        self.columns=['pitch_Atech_hub_temp_1', 'pitch_Atech_cabinet_temp_1',
          'pitch_position_1', 'wind_speed', 'rotor_speed',
         'pitch_Atech_capacitor_temp_1']
        self.job_name = job_name + '_' + fc + '_' + fj
        self.model_folder_name = fj + '_' + model_kind
        self.model_name = fj
        self.fc = fc
        self.fj = fj
        self.model_kind = model_kind
        self.cur_path = cur_path
        self.init_param()
        self.fj_model_kind = fj + '_' + model_kind
        self.field_default=[0,0,0,0,0,0]
        self.train_file_path=get_train_data_path(fc=fc,fj=fj,model_kind=model_kind)
        print(self.train_file_path)
        self.batch_size=100
예제 #11
0
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from model.get_data_path import get_train_data_path, get_test_data_path
from sklearn.model_selection import train_test_split
import os
from util.show_save_result import ShowAndSave

cur_path = os.path.abspath(os.path.dirname(__file__))
datafile = get_train_data_path()


class AdaboostModel(ShowAndSave):
    def __init__(self, params=None, jobname='adbmodel'):
        super().__init__()
        self.job_name = jobname
        self.cur_path = cur_path
        self.init_param()
        self.params = params

    def adaboostmodel(self):
        df = pd.read_csv(datafile, encoding='utf-8', index_col=0)
        traindata = df.iloc[:, :].values
        x = traindata[:, :-1]
        y = traindata[:, -1]
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, train_size=0.7)  # list
        raw_model = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(
            max_features=None,
            max_depth=None,
            min_samples_split=20,
예제 #12
0
from model.get_data_path import get_train_data_path
import numpy as np
import tensorflow as tf
import pandas as pd
from util.data_normalized import data_normalized

HIDDEN_SIZE = 500
NUM_LAYERS = 2
TIMESTEPS = 20
TRAINING_STEPS = 10000
BATCH_SIZE = 1000
INPUT_SIZE = 9
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.9
NUM_EXAMPLES = 50000
datafile = get_train_data_path('wfzc', 'A2', 'cap_temp_1', 'model_params_v3')
f = open(datafile)
df = pd.read_csv(f, index_col=0)
data = df.iloc[:, :].values
MODEL_SAVE_PATH = "model_saved/"
MODEL_NAME = "model.ckpt"
LSTM_KEEP_PROB = 0.9


def lstm_model(x, dropout_keep_prob):

    lstm_cells = [
        tf.nn.rnn_cell.DropoutWrapper(
            tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE),
            output_keep_prob=dropout_keep_prob) for _ in range(NUM_LAYERS)
    ]
예제 #13
0
import pandas as pd
import numpy as np
from model.get_data_path import get_train_data_path


def get_new_mergeddata(filename):
    df = pd.read_csv(filename, encoding='utf-8', index_col=0)
    data_set = df.iloc[:, :].values
    x_train = data_set[:, :-1]
    y_train = data_set[:, -1]
    # tmp_data_set=[]
    # for i in range(len(data_set)):
    #     if i % 2==0:
    #         tmp_data_set.append(data_set[i])
    # print(np.shape(np.array(tmp_data_set)))
    tmp_x_batch = []
    tmp_y_batch = []
    print(x_train[:3])
    print(y_train[:3])


filename = get_train_data_path()
get_new_mergeddata(filename)