def step(self, input_: np.ndarray) -> np.ndarray: # forget gate forget_gate_input = np.dot(self.params["Wf"], input_) forget_gate_hidden = np.dot(self.params["Uf"], self.hidden_state) forget_gate = sigmoid(forget_gate_input + forget_gate_hidden + self.params["bf"]) # input gate input_gate_input = np.dot(self.params["Wi"], input_) input_gate_hidden = np.dot(self.params["Ui"], self.hidden_state) input_gate = sigmoid(input_gate_input + input_gate_hidden + self.params["bi"]) # output gate output_gate_input = np.dot(self.params["Wo"], input_) output_gate_hidden = np.dot(self.params["Uo"], self.hidden_state) output_gate = sigmoid(output_gate_input + output_gate_hidden + self.params["bo"]) # cell state cell_state_input = np.dot(self.params["Wc"], input_) cell_state_hidden = np.dot(self.params["Uc"], self.hidden_state) cell_state = tanh(cell_state_input + cell_state_hidden + self.params["bc"]) self.cell_state = (forget_gate * self.cell_state) + (input_gate * cell_state) self.hidden_state = output_gate * tanh(self.cell_state) return self.hidden_state, self.cell_state
def step(self, input_: np.ndarray) -> np.ndarray: # update gate update_gate_input = np.dot(self.params["Wz"], input_) update_gate_hidden = np.dot(self.params["Uz"], self.hidden_state) update_gate = sigmoid(update_gate_input + update_gate_hidden + self.params["bz"]) # reset gate reset_gate_input = np.dot(self.params["Wr"], input_) reset_gate_hidden = np.dot(self.params["Ur"], self.hidden_state) reset_gate = sigmoid(reset_gate_input + reset_gate_hidden + self.params["br"]) # hidden state proposal proposal_input = np.dot(self.params["Wh"], input_) proposal_hidden = np.dot(self.params["Uh"], (self.hidden_state * reset_gate)) proposal = tanh(proposal_input + proposal_hidden + self.params["bh"]) # new hidden state self.hidden_state = ( (1 - update_gate) * proposal) + (update_gate * self.hidden_state) return self.hidden_state
def forward_bn(self, x, bn_mode='train'): """ 带BN层的前向传播 """ net_inputs = [] net_outputs = [] caches = [] net_inputs.append(x) net_outputs.append(x) caches.append(x) for i in range(1, self.weight_num): # 所有隐层的输入都进行BN,输入层和输出层不进行BN x = x = x @ self.params['w' + str(i)].T net_inputs.append(x) x, cache = self.batch_norm(x, i, bn_mode) # 可以将BN理解为加在隐藏层神经元输入和输出间可训练的一层 caches.append(cache) x = tanh(x) net_outputs.append(x) out = x @ self.params['w' + str(self.weight_num)].T net_inputs.append(out) out = softmax(out) net_outputs.append(out) return { 'net_inputs': net_inputs, 'net_outputs': net_outputs, 'cache': caches }, out
def forward(self, x, dropout_prob=None): """ 前向传播,针对一个mini-batch处理 """ net_inputs = [] # 各层的输入 net_outputs = [] # 各层激活后的输出 net_d = [] # 为了层号对应,将输入层直接添加 net_inputs.append(x) net_outputs.append(x) net_d.append(np.ones(x.shape[1:])) # 输入层无丢弃概率 for i in range(1, self.weight_num): # 参数数量比层数少1 x = x @ self.params['w' + str(i)].T net_inputs.append(x) x = tanh(x) if dropout_prob: # 训练阶段丢弃 x, d_temp = dropout(x, dropout_prob) net_d.append(d_temp) net_outputs.append(x) out = x @ self.params['w' + str(self.weight_num)].T net_inputs.append(out) out = softmax(out) net_outputs.append(out) return { 'net_inputs': net_inputs, 'net_outputs': net_outputs, 'd': net_d }, out
def activation_forward(self, Z, activation="tanh"): if activation is "tanh": return [Z, tanh(Z)] if activation is "relu": return [Z, relu(Z)] if activation is "sigmoid": return [Z, sigmoid(Z)]
def G_base(name, x, batch): if name == 'G1': first_out_channels = 128 else: first_out_channels = 16 with tf.variable_scope(name): conv1 = conv('conv1', x, 7 * 7, first_out_channels, 1, 3, False) ins1 = ins_norm('ins1', conv1) relu1 = relu('relu1', ins1) conv2 = conv('conv2', relu1, 3 * 3, first_out_channels * 2, 2, 0, True) ins2 = ins_norm('ins2', conv2) relu2 = relu('relu2', ins2) conv3 = conv('conv3', relu2, 3 * 3, first_out_channels * 4, 2, 0, True) ins3 = ins_norm('ins3', conv3) relu3 = relu('relu3', ins3) conv4 = conv('conv4', relu3, 3 * 3, first_out_channels * 8, 2, 0, True) ins4 = ins_norm('ins4', conv4) relu4 = relu('relu4', ins4) conv5 = conv('conv5', relu4, 3 * 3, first_out_channels * 16, 2, 0, True) ins5 = ins_norm('ins5', conv5) relu5 = relu('relu5', ins5) x_in = relu5 if name == 'G1': for i in range(9): name = 'res' + str(i + 1) x_in = res_block(name, x_in) up1 = conv_trans('up1', x_in, 3 * 3, first_out_channels * 8, 2, batch, True) ins_up1 = ins_norm('ins_up1', up1) relu_up1 = relu('relu_up1', ins_up1) up2 = conv_trans('up2', relu_up1, 3 * 3, first_out_channels * 4, 2, batch, True) ins_up2 = ins_norm('ins_up2', up2) relu_up2 = relu('relu_up2', ins_up2) up3 = conv_trans('up3', relu_up2, 3 * 3, first_out_channels * 2, 2, batch, True) ins_up3 = ins_norm('ins_up3', up3) relu_up3 = relu('relu_up3', ins_up3) up4 = conv_trans('up4', relu_up3, 3 * 3, first_out_channels, 2, batch, True) ins_up4 = ins_norm('ins_up4', up4) relu_up4 = relu('relu_up4', ins_up4) conv_end = conv('conv_end', relu_up4, 7 * 7, 3, 1, 3, False) tanh_end = tanh('tanh_end', conv_end) return tanh_end, relu_up4
def cell_state(self, x: np.ndarray): """ cell state gate of the LSTM layer :param x: :return: """ return tanh( np.dot(self.W_cell_state.value, x) + self.b_cell_state.value)
def activationFunction(self, z): if self.activ == Activations.SIGMOID.value: return actvtn.sigmoid(z) elif self.activ == Activations.SOFTMAX.value: return actvtn.softmax(z) elif self.activ == Activations.TANH.value: return actvtn.tanh(z) else: return z
def forward_propagation(self, X, parameters): W1 = self.parameters["W1"] b1 = self.parameters["b1"] W2 = self.parameters["W2"] b2 = self.parameters["b2"] Z1 = np.dot(W1, X) + b1 A1 = tanh(Z1) Z2 = np.dot(W2, A1) + b2 A2 = sigmoid(Z2) cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2} return A2, cache
def backward(self, dh_out: np.ndarray): """ backward pass :param dh_out: :return: """ dh_from_next = np.zeros((self.hidden_size, 1)) dC_from_next = np.zeros((self.hidden_size, 1)) dx_out = np.empty((self.seq_size[0], self.vac_size, 1)) for idx in range(len(self.cache.keys()) - 2, -1, -1): x_oh, ft, it, C_hat, out, h, C = self.cache[idx] C_prev = self.cache[idx - 1][-1] dh_out_cur = dh_out[idx] dh_out_cur += dh_from_next out.grad = dh_out_cur * tanh(C.value) C.grad = dC_from_next + dh_out_cur * out.value * dtanh(C.value) C_hat.grad = C.grad * it.value it.grad = C.grad * C_hat.value ft.grad = C.grad * C_prev.value ft.grad = ft.value * (1 - ft.value) * ft.grad out.grad = out.value * (1 - out.value) * out.grad C_hat.grad = (1 - C_hat.value * C_hat.value) * C_hat.grad it.grad = it.value * (1 - it.value) * it.grad x_oh.grad = np.dot(self.W_output.value.T, out.grad) + \ np.dot(self.W_cell_state.value.T, C_hat.grad) + \ np.dot(self.W_input.value.T, it.grad) + \ np.dot(self.W_forget.value.T, ft.grad) self.W_output.grad += np.dot(out.grad, x_oh.value.T) self.b_output.grad += out.grad self.W_cell_state.grad += np.dot(C_hat.grad, x_oh.value.T) self.b_cell_state.grad += C_hat.grad self.W_input.grad += np.dot(it.grad, x_oh.value.T) self.b_input.grad += it.grad self.W_forget.grad += np.dot(ft.grad, x_oh.value.T) self.b_forget.grad += ft.grad dh_from_next = x_oh.grad[:self.hidden_size, :] dx_out[idx] = x_oh.grad[self.hidden_size:, :] dC_from_next = ft.value * C.grad return dx_out
def build_G(self, x_bound, x_label, x_feat, x_k, x_b): with tf.variable_scope('G'): # 融合 x_feat_act = tf.add(tf.multiply(x_feat, x_k), x_b) x_concat = tf.concat([x_bound, x_label, x_feat_act], 3) # input_downsampled = tf.nn.avg_pool(x_concat, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME") # G1 _, G1_relu_up4 = G_base('G1', input_downsampled, self.batch) # G2_1 G2_1_conv1 = conv('G2_1_conv1', x_concat, 7 * 7, 64, 1, None, True) G2_1_ins1 = ins_norm('G2_1_ins1', G2_1_conv1) G2_1_relu1 = relu('G2_1_relu1', G2_1_ins1) G2_1_conv2 = conv('G2_1_conv2', G2_1_relu1, 3 * 3, 128, 2, 1, False) G2_1_ins2 = ins_norm('G2_1_ins2', G2_1_conv2) G2_1_relu2 = relu('G2_1_relu2', G2_1_ins2) # 融合G1的输出和G2_1的输出 128 G_add = tf.add(G1_relu_up4, G2_1_relu2, name='G_Add') # G2_2 # res_block for i in range(3): name = 'G2_2_res' + str(i + 1) G_add = res_block(name, G_add, channels=128) # G2_2_trans = conv_trans('G2_2_trans', G_add, 3 * 3, 64, 2, self.batch, True) G2_2_ins2 = ins_norm('G2_2_ins2', G2_2_trans) G2_2_relu2 = relu('G2_2_relu2', G2_2_ins2) # final convolution G2_2_conv_end = conv('G2_2_conv_end', G2_2_relu2, 7 * 7, 3, 1, None, True) G2_2_tanh_end = tanh('G2_2_tanh_end', G2_2_conv_end) return G2_2_tanh_end
def forward(self, seq: np.ndarray): """ forward pass :param seq: :return: """ h = Param(np.zeros((self.hidden_size, 1))) C = Param(np.zeros((self.hidden_size, 1))) ft = Param(np.zeros((self.hidden_size, 1))) it = Param(np.zeros((self.hidden_size, 1))) C_hat = Param(np.zeros((self.hidden_size, 1))) out = Param(np.zeros((self.hidden_size, 1))) self.seq_size = seq.shape self.cache = {} self.cache[-1] = (Param(h.value), Param(C.value)) output = np.empty((self.seq_size[0], self.hidden_size, 1)) for idx, x in enumerate(seq): x_oh = Param(np.row_stack((h.value, x))) ft.value = self.forget_gate(x=x_oh.value) it.value = self.input_gate(x=x_oh.value) C_hat.value = self.cell_state(x=x_oh.value) C.value = ft.value * C.value + it.value * C_hat.value out.value = self.output_gate(x=x_oh.value) h.value = out.value * tanh(C.value) output[idx] = h.value self.cache[idx] = (Param(x_oh.value), Param(ft.value), Param(it.value), Param(C_hat.value), Param(out.value), Param(h.value), Param(C.value)) if self.return_seq: return output else: return output[-1]
def activation_forward(self,input,W,b,activation_type): ''' :param input: the input of the current layer :param W: the weights of the current layer :param b: biases of the current layer :param activation_type: Type of activation function used in the forward propagation :return: - A --> the output of the activation function - packet_of_packets --> Tuple of 2 elements which will be used in backward propagation : 1- linear packer : contains ( input , weights , biases ) of the current layer 2- activation packet : contains ( Z ) which is the input to the activation function ''' if activation_type == "sigmoid": Z, linear_packet = self.identity_forward(input, W, b) ## Z = input * w + b temp=activations.Sigmoid() A, activation_packet = temp.forward(Z) ## A = sig(z) elif activation_type == "relu": Z, linear_packet = self.identity_forward(input, W, b) temp = activations.relu() A, activation_packet = temp.forward(Z) elif activation_type == "leaky_relu": Z, linear_packet = self.identity_forward(input, W, b) temp = activations.leaky_relu() A, activation_packet = temp.forward(Z) elif activation_type == "tanh": Z, linear_packet = self.identity_forward(input, W, b) temp = activations.tanh() A, activation_packet = temp.forward(Z) elif activation_type == "softmax": Z, linear_packet = self.identity_forward(input, W, b) #temp = A, activation_packet = activations.Softmax().forward(Z) elif activation_type == "linear": Z, linear_packet = self.identity_forward(input, W, b) # temp = A, activation_packet = Z,Z else: raise ValueError("ERROR : Activation Function is Not Determined") packet_of_packets = linear_packet, activation_packet return A, packet_of_packets
def neural_network(test = False): #trains network if test == False, runs nn on test data if test == True if (test == True): #weight matrices must be initialized first before loaded w0_test = np.loadtxt('w0.txt') w1_test = np.loadtxt('w1.txt') w2_test = np.loadtxt('w2.txt') #care about values & train_vec to vectorize user inputs x_train, win_train, train_mat, values, train_vec = input_param('DeckParameters', TrainingData, 0, number_training_examples) train_deck1 = str(input('Enter the name of the first deck: \n')) train_deck2 = str(input('\nEnter the name of the second deck: \n')) train_deck3 = str(input('\nEnter the name of the third deck: \n')) test_decks = [train_deck1, train_deck2, train_deck3] #substitutes commander name with corresponding parameters - to parameterize deck data for i in range(len(test_decks)): test_decks[i] = (values[values[:, 0] == str(test_decks[i])]).tolist() del test_decks[i][0][0] x_test = [float(n) for n in list(itertools.chain.from_iterable( list(itertools.chain.from_iterable(test_decks))))] #divides x_test into features for each corresponding deck and feeds into training loop x_test_deck1 = x_test[0:number_features] x_test_deck2 = x_test[number_features:2*number_features] x_test_deck3 = x_test[2*number_features:3*number_features] #sends data through nn maxValue = 0 for deck_test in [x_test_deck1, x_test_deck2, x_test_deck3]: l0_test = np.array(deck_test, dtype=np.float128).reshape(len(deck_test), 1) a1_test = tanh(np.dot(w0_test, l0_test)) a2_test = tanh(np.dot(w1_test, a1_test)) a3_test = sigmoid(np.dot(w2_test, a2_test)) deck_test.append(max(a3_test)) #to compare maximum a3_test values #saves a3_test value and name of predicted deck for display later if deck_test[-1] > maxValue: maxValue = deck_test[-1] best_deck_index = [x_test_deck1, x_test_deck2, x_test_deck3].index(deck_test) #to be completely honest, not sure why the code only works when this is initialized here test_decks_name = [train_deck1, train_deck2, train_deck3] print ('The winner is predicted to be: ' + str(test_decks_name[best_deck_index]) + ' with a confidence of ' + str(maxValue)) elif (test == False): #for training nn - run when initialized with test=False #initializes weight matrices - reshape to ensure compliance x, win, decklist, values, train_vec = input_param('DeckParameters', TrainingData, 0, number_training_examples) ''' w0, w1, w2 = initialize(x, layer1_nodes, output_nodes) w0 = w0.reshape(layer0_nodes, int(len(x))) w1 = w1.reshape(layer1_nodes, layer1_nodes) w2 = w2.reshape(output_nodes, layer1_nodes) ''' #loads previous weight matrices - use if not initializing weights w0 = np.loadtxt('w0.txt') w1 = np.loadtxt('w1.txt') w2 = np.loadtxt('w2.txt') #functional albeit unelegant iterative. First loop iterates through number of matches, second loop is a training loop (updating weights every loop) deck = -1 for train_match in range(number_training_examples): deck += 1 for i in range(100): #converts x1 input to matrix x, win, decklist, values, train_vec = input_param('DeckParameters', TrainingData, deck, number_training_examples) l0 = np.array(x, dtype=np.float128).reshape(len(x), 1) a1 = tanh(np.dot(w0, l0)) a2 = tanh(np.dot(w1, a1)) a3 = sigmoid(np.dot(w2, a2)) #index of where in win a 1 shows up for the corresponding match win_index = list(win[deck]).index(1) #print ('The winner is predicted to be: ' + str(decklist[0][list(a3).index(max(a3))])) #print ('The actual winner of this match was: ' + str(decklist[0][win_index])) #begin backpropagation #calculate error corresponding to output layer win_0T = np.transpose([win[deck]]) l3_error = win_0T - a3 l3D = l3_error*sigmoid(a3, deriv=True) #calculate error corresponding to l2 (second hidden layer) l2_error = np.dot(w2.T, l3D) l2D = l2_error*tanh(a2, deriv=True) l1_error = np.dot(w1.T, l2D) l1D = l1_error*tanh(a1, deriv=True) ''' #if want to view error decreasing through iterations if (i % 1000) == 0: print ("Error: " + str(np.mean(np.abs(l3_error)))) ''' #print (np.dot(l1D, win_0T.T)) #updating weights w0, w1, w2 = update_weights(w0, w1, w2, win_0T, a1, a2, l1D, l2D, l3D) print ('The winner is predicted to be: ' + str(decklist[deck][list(a3).index(max(a3))]) + ' with a confidence of ' + str(max(list(a3)))) print ('The actual winner of this match was: ' + str(decklist[deck][win_index]) + '\n') #saves updated weight matrices to be loaded on future training w0_save = np.savetxt('w0.txt', w0) w1_save = np.savetxt('w1.txt', w1) w2_save = np.savetxt('w2.txt', w2)