def oversamp(): x, y = dataing.data_make() ##返回的都是数值类型 #print(x[317],y[317]) x_set = np.array(x, dtype=object) ##由于是不定长数据,需要带上dtype y_set = np.array(y) '''print(x_set[0]) print(x_set.shape) # (516,) print(y_set.shape) # (516,10)''' max_length = 193 X = keras.preprocessing.sequence.pad_sequences(x_set, maxlen=max_length, dtype='float64', padding='post', value=[0.0, 0.0]).tolist() x = [] for i in X: k = [n for a in i for n in a] x.append(k) #print(x[0]) y = y_set[:, 0].tolist() ###设置单标签 #print('y_set',len(y_set)) print(Counter(y)) ##平衡数据:0第一类{1: 330, 0: 187},1第二类{1: 393, 0: 124},2第三类{1: 347, 0: 170} ########: 3第四类{0: 430, 1: 87},4第五类{1: 316, 0: 201},5第六类{0: 482, 1: 35} ########: 6第七类{0: 444, 1: 73},7第八类{0: 432, 1: 85},8第九类{0: 422, 1: 95},9第十类{0: 502, 1: 15} from imblearn.over_sampling import RandomOverSampler ros = RandomOverSampler(random_state=10) x_resampled, y_resampled = ros.fit_resample(x, y) print(sorted(Counter(y_resampled).items())) #print(x_resampled[0]) #print(len(y_resampled)) result = [] for i in x_resampled: b = np.array(i).reshape(193, 2).tolist() # reshape(列的长度,行的长度) result.append(b) #print(type(result),type(y_resampled))##返回的都是等长的列表类型 return result, y_resampled
import numpy as np import pandas as pd from tensorflow.keras.layers import Dense import numpy as np import pandas as pd import random from sklearn import metrics import tensorflow.keras.backend as K #在自己的库函数 import dataing ##读取数据 import data_same_length ##处理成定长数据 import loss_function from tensorflow import keras #1.读入数据,打乱顺序 x, y = dataing.data_make() train_set = [] for i in range(len(x)): train_set.append([x[i], y[i]]) random.shuffle(train_set) ###数据重排 x_set = [e[0] for e in train_set] #特征数据 y_set = [f[1] for f in train_set] #标签 ##训练数据最大长度,可以在dataing程序中查看,目前是276 max_length = 276 ##keras.preprocessing.sequence.pad_sequences将多个序列截断或补齐为相同长度,返回numpy数组 X = keras.preprocessing.sequence.pad_sequences(x_set, maxlen=max_length, dtype='float64', padding='post',
import pandas as pd import random from sklearn import metrics import tensorflow.keras.backend as K #在自己的库函数 import dataing ##读取数据 import data_same_length ##处理成定长数据 import loss_function ##平衡数据:0第一类{1: 330, 0: 187},1第二类{1: 393, 0: 124},2第三类{1: 347, 0: 170} ########: 3第四类{0: 430, 1: 87},4第五类{1: 316, 0: 201},5第六类{0: 482, 1: 35} ########: 6第七类{0: 444, 1: 73},7第八类{0: 432, 1: 85},8第九类{0: 422, 1: 95},9第十类{0: 502, 1: 15} #读取数据 x, y = dataing.data_make() ##不定长特征 #x,y = dataing.data_make_samelen()##定长特征 train_set = [] for i in range(len(x)): random.shuffle(x[i]) #print(x[i]) #a =x[i].copy() train_set.append([x[i], y[i]]) random.shuffle(train_set) ###数据重排 x_set1 = [e[0] for e in train_set] #特征数据 y_set1 = [f[1] for f in train_set] #标签 '''x1 = x.copy() y1 = np.array(y)[:,0].tolist() print(len(x))
# -*- coding: utf-8 -*- #@Author : lynch # 导入本项目所需要的包 import dataing ##读取数据 import data_same_length ##处理成定长数据 import tensorflow as tf from tensorflow.keras.layers import Dense import numpy as np import pandas as pd import random #加载数据 x, y = dataing.data_make( ) ##分别读取读取‘training_set.csv’和‘training_label.csv’文件,不定长 train_set = [] for i in range(len(x)): train_set.append([x[i], y[i]]) random.shuffle(train_set) ###数据重排 x_1 = [e[0] for e in train_set] #特征数据 y_1 = [f[1] for f in train_set] #标签 '''#数据长度归一化 length_x = [] for i in x_set: length_x.append(len(i)) max_length = max(length_x) print('训练数据最大长度是:',max_length) x_set = data_same_length.same_length(x_set,max_length) print(x_set[0]) print(len(x_set[0]))'''