def _build_data(self): '''搭建数据模型''' # 读数据 data_0_df = pd.read_csv(self.my_config.normal_data_path) data_1_df = pd.read_csv(self.my_config.characteristic_data_path) data_0 = data_0_df.values data_1 = data_1_df.values # 数据预处理 data_0, data_1 = data_preprocess(data_0, data_1) self.X, self.Y = get_full_train_data(data_0, data_1) self.X, self.X_test, self.Y, self.Y_test = train_test_split(self.X, self.Y, test_size=0.4)
def build_data(self): '''搭建数据模型''' # 读数据 data_0_df = pd.read_csv('.'+self.config.normal_data_path) data_1_df = pd.read_csv('.'+self.config.characteristic_data_path) data_0 = data_0_df.values data_1 = data_1_df.values # 数据预处理 data_0, data_1 = data_preprocess(data_0, data_1) self.X, self.Y = get_full_train_data(data_0, data_1) #划分训练集和测试集 #self.x_train,self.x_test,self.y_train,self.y_test = train_test_split(self.x_train,self.y_train,test_size=0.15) self.X_text , self.X_editor = split_textEditor_data(self.X)
def build(self): # 读数据 data_0_df = pd.read_csv(self.config.normal_data_path) data_1_df = pd.read_csv(self.config.characteristic_data_path) data_0 = data_0_df.values data_1 = data_1_df.values # 数据预处理 data_0, data_1 = data_preprocess(data_0, data_1) # 获得全部的数据 self.X, self.Y = get_full_train_data(data_0, data_1) #划分测试集和训练集 self.X, self.test_X_final, self.Y, self.test_Y_final = train_test_split( self.X, self.Y, test_size=0.3) assert len(self.X) == len(self.Y)
def _build_data(self): '''搭建数据模型''' # 读数据 data_0_df = pd.read_csv(self.config.normal_data_path) data_1_df = pd.read_csv(self.config.characteristic_data_path) data_0 = data_0_df.values data_1 = data_1_df.values #数据预处理 data_0,data_1=data_preprocess(data_0,data_1) if self.config.use_cross_validation: self.X,self.Y = get_full_train_data(data_0,data_1) else: #获得训练和测试数据 self.train,self.test=get_train_and_test_data(data_0, data_1) #把label分出来 self.x_train,self.y_train\ ,self.x_test,self.y_test=split_data_label(self.train,self.test)