def StandardScaler(dataX): '''标准化''' if isinstance(dataX, mat): x = copy.deepcopy(dataX) elif isinstance(dataX, list): x = copy.deepcopy(mat(dataX)) else: raise TypeError("输入矩阵!!类型!!错误!!") pack = m.MeanAndVar(x) return mat([[(float(x.mat[i][j])-pack[j][0])/pack[j][1] for j in range(x.col)] for i in range(x.row)])
def MinMaxScaler(dataX): '''极差标准化''' if isinstance(dataX, mat): x = copy.deepcopy(dataX) elif isinstance(dataX, list): x = copy.deepcopy(mat(dataX)) else: raise TypeError("输入矩阵!!类型!!错误!!") pack = m.MaxAndMin(x) return mat([[(float(x.mat[i][j])-pack[j][1])/(pack[j][0]-pack[j][1]) for j in range(x.col)] for i in range(x.row)])
def Normalizer(dataX): '''归一化''' if isinstance(dataX, mat): x = copy.deepcopy(dataX) elif isinstance(dataX, list): x = copy.deepcopy(mat(dataX)) else: raise TypeError("输入矩阵!!类型!!错误!!") xT = x.T add = list(map(sum, [list(map(lambda x: x**2, i))for i in x.T.mat])) return mat([[(float(x.mat[i][j]**2)) / add[j] for j in range(x.col)] for i in range(x.row)])
def BridgeLinear(data_X, data_y, k=1, bias=False): '''岭回归''' if not isinstance(data_X, list) or isinstance(data_X, mat): raise TypeError("X特征矩阵数据类型不符合标准,[○・`Д´・ ○]") if not isinstance(data_y, list) or isinstance(data_y, mat): raise TypeError("y标签数据类型不符合标准,[○・`Д´・ ○]") X = mat(copy.deepcopy(data_X)) y = mat(data_y) if bias == True: a = [[1] for i in range(X.col + 1)] X = m.column_stack(X.mat, a) if X.row != y.row: raise Exception("行数不一样啦,衰仔!") n = mat(X).col kI = mat.dotk(k, mat.ones(n)) return m.dot((m.dot(X.T, X) + kI).I, X.T, y)
def __train(self): '''标签的元组组合的列表''' # 个数 self._labelNum = [(i, list(map(tuple, self._outputs)).count(i)) for i in self._class] # 分数 self._labelScore = [(self._class[i], self._labelNum[i][1] / float(self.irow)) for i in range(len(self._labelNum))] # 输入与标签的组合矩阵 mapX = copy.deepcopy(self._inputs) for i in range(self.irow): mapX[i].append(self._outputs[i]) newmapX = [] for i in self._labelNum: temp = [] for j in range(self.irow): if mapX[j][-1] == list(i[0]): temp.append(mapX[j]) newmapX.append(temp) # 计算处理得到(均值,方差)的列表 for i in range(len(newmapX)): temp = list(map(list, zip(*newmapX[i]))) temp.pop(-1) temp = list(map(list, zip(*temp))) meanAndvar = MeanAndVar(mat(temp)) self._weight.append(meanAndvar)
def predict(self, inputs, Debug=False): '''输入矩阵,进行预测''' if not isinstance(inputs, mat): raise TypeError("类型得是mat,_(:з」∠)_") data = copy.deepcopy(inputs) if Debug: try: import numpy as np ah = list(np.tanh(m.dot(data, mat(self.wi)).mat)) ao = np.tanh(m.dot(mat(ah), mat(self.wo)).mat) return ao except: raise Exception("测试用numpy库加载失败,o(╥﹏╥)o") else: xwi = m.dot(data, mat(self.wi)).mat ah = mat([list(map(self.active, i)) for i in xwi]) xwiwo = m.dot(ah, mat(self.wo)).mat ao = mat([list(map(self.active, i)) for i in xwiwo]) return ao
# ao = mat([list(map(self.active, i)) for i in xwiwo]) return xwiwo if __name__ == '__main__': # 先创建特征矩阵,默认规范化(数值在0-1之间) data_X = m.random.rand((100, 8)) # data_X = [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1]] # 创建特征的标签向量 data_y = m.random.randint((100, 1), (-5, 16)) # data_y = [[0], [1], [0], [1], [1], [1], [1]] # 创建BP实例,输入矩阵列8, 隐含层矩阵列10, 输出层列1, 迭代次数5000, 梯度步长0.4, 修正参数0.2 nn = BPNN(8, 6, 1, active="tanh", iterations=5000, learnRate=0.001, B=0.002) # 填充特征矩阵和标签, info表示是否显示迭代过程, show表示是否显示误差图像, bias偏置(为True,输入矩阵列需要加1) nn.fit(mat(data_X), mat(data_y), info=True, show=True, bias=False) # 存储权重矩阵,在weights文件夹下(需要pandas库) nn.saveWeights() # 显示算法基本信息 print(nn) # 进行预测,如果Debug为True,则用到了numpy,否则则是自己写的 print(nn.predict(m.random.rand((20, 8)), Debug=False))
if __name__ == '__main__': # region BP测试 # # 先创建特征矩阵,默认规范化(数值在0-1之间) # data_X = m.random.rand((100, 8)) # # data_X = [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1]] # # # 创建特征的标签向量 # data_y = m.random.rand((100, 1)) # # data_y = [[0], [1], [0], [1], [1], [1], [1]] # # # 创建BP实例,输入矩阵列8, 隐含层矩阵列10, 输出层列1, 迭代次数5000, 梯度步长0.4, 修正参数0.2 # nn = BPNN(8, 6, 1, active="tanh", iterations=5000, learnRate=0.01, B=0.02) # # 填充特征矩阵和标签, info表示是否显示迭代过程, show表示是否显示误差图像, bias偏置(为True,输入矩阵列需要加1) # nn.fit(mat(data_X), mat(data_y), info=True, show=True, bias=False) # # 存储权重矩阵,在weights文件夹下(需要pandas库) # nn.saveWeights() # # 显示算法基本信息 # print(nn) # # 进行预测,如果Debug为True,则用到了numpy,否则则是自己写的 # print(nn.predict(m.random.rand((20, 8)), Debug=False)) # endregion nb = NaiveBayes() nb.fit( mat([[1, 2, 3], [2, 3, 4], [1, 1, 1], [2, 2, 3], [5, 6, 7], [1, 2, -1], [1, 0, 1.5]]), mat([[1, 2], [3, 4], [3, 4], [0, 1], [1, 2], [0, 1], [-1, 2]])) print nb.predict( mat([[1, 2, 3], [2, 3, 4], [0, 1, 1], [5, 6, 7], [2, 2, 3], [1, 0, 1.5]]))