def _step(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf, bo, bc): i = sigmoid(T.dot(x_t, Wi) + T.dot(ht_1, Whi) + bi) f = sigmoid(T.dot(x_t, Wf) + T.dot(ht_1, Whf) + bf) o = sigmoid(T.dot(x_t, Wo) + T.dot(ht_1, Who) + bo) c = tanh(T.dot(x_t, Wc) + T.dot(ht_1, Whc) + bc) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def _step(x_t, ct_1, ht_1, W, Wh, b, dim): tmp = T.dot(x_t, W) + T.dot(ht_1, Wh) + b i = sigmoid(_slice(tmp, 0, dim)) f = sigmoid(_slice(tmp, 1, dim)) o = sigmoid(_slice(tmp, 2, dim)) c = tanh(_slice(tmp, 3, dim)) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def _step_index(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf, bo, bc): # x_t: array of type int32 # use indexing on Wi, Wf, Wo and Wc matrices instead of computing the product with the one-hot representation of the input for computational and memory efficiency i = sigmoid(Wi[x_t] + T.dot(ht_1, Whi) + bi) f = sigmoid(Wf[x_t] + T.dot(ht_1, Whf) + bf) o = sigmoid(Wo[x_t] + T.dot(ht_1, Who) + bo) c = tanh(Wc[x_t] + T.dot(ht_1, Whc) + bc) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def backward_pass(self, y_true, d_next, cache): Wsx, Wsh, bs, \ Wix, Wih, bi, \ Wfx, Wfh, bf, \ Wox, Woh, bo, \ Why, by = self.get_weights_and_biases() # unpacking state variables from [t + 1] dh_next, ds_next = d_next # recovering variables from forward pass x, h, h_old, s, s_old, s_bar, i, f, o, y, prob = cache # ~ output as probabilities dy = np.copy(prob) dy[y_true] -= 1 # ~ output dWhy = dy @ h.T dby = dy # ~ hidden state delta = Why.T @ dy dh = dh_next + delta # ~ state ds = dh * o * (1 - u.tanh(s)**2) + ds_next ds_bar = ds * i * (1 - s_bar**2) # ~ gates di = ds * s_bar * (i * (1 - i)) df = ds * s_old * (f * (1 - f)) do = dh * u.tanh(s) * (o * (1 - o)) # calculating gradients dh_acc = 0 grad = dict(Why=dWhy, by=dby) for d, W, dWx, dWh, db in zip([di, df, do, ds_bar], [Wih, Wfh, Woh, Wsh], ['Wix', 'Wfx', 'Wox', 'Wsx'], ['Wih', 'Wfh', 'Woh', 'Wsh'], ['bi', 'bf', 'bo', 'bs']): grad[dWx] = d @ x.T grad[dWh] = d @ h_old.T grad[db] = d dh_acc += W.T @ d # saving current derivatives for [t - 1] dh_next = dh_acc ds_next = ds * f state = (dh_next, ds_next) return grad, state
def _step_index(x_t, ct_1, ht_1, W, Wh, b, dim): # x_t: array of type int32 # use indexing on W matrix instead of computing dot product with the one-hot representation of the input for computational and memory efficiency tmp = W[x_t] + T.dot(ht_1, Wh) + b i = sigmoid(_slice(tmp, 0, dim)) f = sigmoid(_slice(tmp, 1, dim)) o = sigmoid(_slice(tmp, 2, dim)) c = tanh(_slice(tmp, 3, dim)) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def forward(self, input_data, h_prev, C_prev): z = np.row_stack((h_prev, input_data)) f = utils.sigmoid(np.dot(self.W_f.v, z) + self.b_f.v) i = utils.sigmoid(np.dot(self.W_i.v, z) + self.b_i.v) C_bar = utils.tanh(np.dot(self.W_C.v, z) + self.b_C.v) C = f * C_prev + i * C_bar o = utils.sigmoid(np.dot(self.W_o.v, z) + self.b_o.v) h = o * utils.tanh(C) v = np.dot(self.W_v.v, h) + self.b_v.v y = np.exp(v) / np.sum(np.exp(v)) #softmax return z, f, i, C_bar, C, o, h, v, y
def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python tuple containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if activation == "sigmoid": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = utils.sigmoid(Z) elif activation == "tanh": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = utils.tanh(Z) elif activation == "relu": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = utils.relu(Z) assert A.shape == (W.shape[0], A_prev.shape[1]) cache = (linear_cache, activation_cache) return A, cache
def expmap(self, u, p, c): sqrt_c = c**0.5 u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) second_term = (tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm) * u / (sqrt_c * u_norm)) gamma_1 = self.mobius_add(p, second_term, c) return gamma_1
def classify(self, x): # Add the bias for all inputs data_set = np.concatenate((np.ones(1).T, np.array(x)), axis=0) for layer in range(0, len(self.weights)): sum_value = np.dot(data_set, self.weights[layer]) data_set = utils.tanh(sum_value) return data_set
def expmap0(self, u, c): sqrt_c = c**0.5 u_norm = torch.clamp_max( torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm), self.max_norm) gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm) return gamma_1
def _step( m_, x_, # sequences h_, c_, # outputs_info pctx_, context, Wd_att, U_att, c_att, W_sel, b_sel, U, # non_sequences dp_=None, dp_att_=None): preact = tensor.dot(h_, U) preact += x_ # preact += tensor.dot(ctx_, Wc) i = _slice(preact, 0, dim) f = _slice(preact, 1, dim) o = _slice(preact, 2, dim) if options['use_dropout']: i = i * _slice(dp_, 0, dim) f = f * _slice(dp_, 1, dim) o = o * _slice(dp_, 2, dim) i = tensor.nnet.sigmoid(i) f = tensor.nnet.sigmoid(f) o = tensor.nnet.sigmoid(o) c = tensor.tanh(_slice(preact, 3, dim)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ # attention pstate_ = tensor.dot(h, Wd_att) pctx_ = pctx_ + pstate_[:, None, :] pctx_ = tanh(pctx_) alpha = tensor.dot(pctx_, U_att) + c_att alpha_pre = alpha alpha_shp = alpha.shape alpha = tensor.nnet.softmax( alpha.reshape([alpha_shp[0], alpha_shp[1]])) # softmax ctx_ = (context * alpha[:, :, None]).sum(1) # (m, ctx_dim) if options['selector']: sel_ = tensor.nnet.sigmoid(tensor.dot(h_, W_sel) + b_sel) sel_ = sel_.reshape([sel_.shape[0]]) ctx_ = sel_[:, None] * ctx_ rval = [ h, c, alpha, ctx_, sel_, pstate_, pctx_, i, f, o, preact, alpha_pre ] return rval
def training(self, data_set, correct_output, n=0.2, epochs=1000): """ Trains the NeuralNetwork with the data set. Args: data_set: matrix with the vectors containg the inputs. correct_output: the expected output for each training. n: the learning rate. epochs: number of times that the network run all the data set. """ # File to write error history f = open("graphics/error_output.txt", "w") data_set = self.insert_bias(data_set) last_errors = [] for epoch in range(epochs): if epoch % 1000 is 0: print "Epoch: {}".format(epoch) random_index = np.random.randint(data_set.shape[0]) # layer_data: [w0, w1, w2, output] layer_data = [data_set[random_index]] # Calculate output for hidden layers for layer in range(len(self.weights)): dot_value = np.dot(layer_data[layer], self.weights[layer]) activation = utils.tanh(dot_value) layer_data.append(activation) # layer_data now contains: [ [outputs from input_layer(inputs)], # [outputs from hidden layer(s)], [output from output layer] ] # Calculate the error for output layer error = correct_output[random_index] - layer_data[-1] average_error = abs(np.average(error)) last_errors.append(average_error) if len(last_errors) == 10: last_errors_average = np.average(last_errors) f.write("{} {}\n".format(epoch, last_errors_average)) if last_errors_average < 0.001: print last_errors_average break last_errors = [] deltas = [error * utils.dtanh(layer_data[-1])] # Calculate Deltas for l in range(len(layer_data) - 2, 0, -1): deltas.append( deltas[-1].dot(self.weights[l].T)*utils.dtanh(layer_data[l]) ) deltas.reverse() # Backpropagate. Update the weights for all the layers for i in range(len(self.weights)): layer = np.atleast_2d(layer_data[i]) delta = np.atleast_2d(deltas[i]) self.weights[i] += n * layer.T.dot(delta) f.close()
def output(self): """ Generate output of this layer """ self.x = self.prev_layer.output() if not self.theta: self.theta = self.rng.uniform(size=(self.n_neuron, len(self.x))) self.b = self.rng.uniform(size=(self.n_neuron, )) return tanh(numpy.dot(self.theta, self.x) + self.b)
def output(self): """ Generate output of this layer """ self.x = self.prev_layer.output() if not self.theta: self.theta = self.rng.uniform(size=(self.n_neuron, len(self.x))) self.b = self.rng.uniform(size=(self.n_neuron,)) return tanh(numpy.dot(self.theta, self.x) + self.b)
def mobius_matvec(self, m, x, c): sqrt_c = c ** 0.5 x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) mx = x @ m.transpose(-1, -2) mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c) cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8) res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device) res = torch.where(cond, res_0, res_c) return res
def lstm_numpy(x, W, U, b): z = numpy.dot(x, W) + b n_hidden = b.shape[0]/4 h = numpy.zeros((x.shape[0], n_hidden), dtype=x.dtype) prev_h = numpy.zeros(n_hidden, dtype=x.dtype) prev_c = numpy.zeros(n_hidden, dtype=x.dtype) def _slice(_x, n, dim): return _x[n*dim:(n+1) * dim] for n in range(len(h)): preact = numpy.dot(prev_h, U) + z[n] i = utils.sigmoid(_slice(preact, 0, n_hidden)) f = utils.sigmoid(_slice(preact, 1, n_hidden)) o = utils.sigmoid(_slice(preact, 2, n_hidden)) c = utils.tanh(_slice(preact, 3, n_hidden)) c = f * prev_c + i * c h[n] = o * utils.tanh(c) prev_c = c prev_h = h[n] return h
def _step(x): # attention pstate = T.dot(x, Wd_att) pstate = pctx + pstate[:, None, :] pstate = tanh(pstate) # n * f * ctx_dim alpha = T.dot(pstate, U_att)+c_att # n * f * 1 alpha_shp = alpha.shape alpha = T.nnet.softmax(alpha.reshape([alpha_shp[0], alpha_shp[1]])) # softmax ctx_ = (context * alpha[:, :, None]).sum(1) # (n, ctx_dim) rval = [alpha, ctx_] return rval
def backward(self, target, dh_next, dC_next, C_prev, z, f, i, C_bar, C, o, h, v, y): # the following code still needs to be modified. # for example: p -> self dv = np.copy(y) dv[target] -= 1 self.W_v.d += np.dot(dv, h.T) self.b_v.d += dv dh = np.dot(self.W_v.v.T, dv) dh += dh_next do = dh * utils.tanh(C) do = utils.dsigmoid(o) * do self.W_o.d += np.dot(do, z.T) self.b_o.d += do dC = np.copy(dC_next) dC += dh * o * utils.dtanh(utils.tanh(C)) dC_bar = dC * i dC_bar = utils.dtanh(C_bar) * dC_bar self.W_C.d += np.dot(dC_bar, z.T) self.b_C.d += dC_bar di = dC * C_bar di = utils.dsigmoid(i) * di self.W_i.d += np.dot(di, z.T) self.b_i.d += di df = dC * C_prev df = utils.dsigmoid(f) * df self.W_f.d += np.dot(df, z.T) self.b_f.d += df dz = (np.dot(self.W_f.v.T, df) + np.dot(self.W_i.v.T, di) + np.dot(self.W_C.v.T, dC_bar) + np.dot(self.W_o.v.T, do)) dh_prev = dz[:self.h_size, :] dC_prev = f * dC return dh_prev, dC_prev
def output(self): """ Generate output of this layer """ self.x_imgs = self.prev_layer.output() return numpy.asarray(map( lambda i: tanh(self.b[i] + reduce( lambda res, j: res + conv2d(self.x_imgs[j], self.theta[i]), self.connections[i], 0 )), xrange(0, len(self.connections)) ))
def lstm_numpy(x, W, U, b): z = numpy.dot(x, W) + b n_hidden = b.shape[0] / 4 h = numpy.zeros((x.shape[0], n_hidden), dtype=x.dtype) prev_h = numpy.zeros(n_hidden, dtype=x.dtype) prev_c = numpy.zeros(n_hidden, dtype=x.dtype) def _slice(_x, n, dim): return _x[n * dim:(n + 1) * dim] for n in range(len(h)): preact = numpy.dot(prev_h, U) + z[n] i = utils.sigmoid(_slice(preact, 0, n_hidden)) f = utils.sigmoid(_slice(preact, 1, n_hidden)) o = utils.sigmoid(_slice(preact, 2, n_hidden)) c = utils.tanh(_slice(preact, 3, n_hidden)) c = f * prev_c + i * c h[n] = o * utils.tanh(c) prev_c = c prev_h = h[n] return h
def forward_pass(self, x_index, state): Wsx, Wsh, bs, \ Wix, Wih, bi, \ Wfx, Wfh, bf, \ Wox, Woh, bo, \ Why, by = self.get_weights_and_biases() h_old, s_old = state # ~ input vector x = np.zeros((self.V, 1)) x[x_index] = 1.0 # ~ gates i = u.sigmoid(Wix @ x + Wih @ h_old + bi) o = u.sigmoid(Wox @ x + Woh @ h_old + bo) f = u.sigmoid(Wfx @ x + Wfh @ h_old + bf) # ~ state s_bar = u.tanh(Wsx @ x + Wsh @ h_old + bs) s = f * s_old + i * s_bar # ~ hidden state h = o * u.tanh(s) # ~ output y = Why @ h + by # ~ output as probabilities prob = u.softmax(y) # saving variables for backpropagation cache = (x, h, h_old, s, s_old, s_bar, i, f, o, y, prob) state = (h, s) return y, state, cache
def predict(self, input): L = np.shape(input)[0] az = np.zeros((L, self.Nhidden)) ar = np.zeros((L, self.Nhidden)) ahhat = np.zeros((L, self.Nhidden)) ah = np.zeros((L, self.Nhidden)) a1 = tanh(np.dot(input, self.w1) + self.b1) x = np.concatenate((np.zeros((self.Nhidden)), a1[1, :])) az[1, :] = sigm(np.dot(x, self.wz) + self.bz) ar[1, :] = sigm(np.dot(x, self.wr) + self.br) ahhat[1, :] = tanh(np.dot(x, self.wh) + self.bh) ah[1, :] = az[1, :] * ahhat[1, :] for i in range(1, L): x = np.concatenate((ah[i - 1, :], a1[i, :])) az[i, :] = sigm(np.dot(x, self.wz) + self.bz) ar[i, :] = sigm(np.dot(x, self.wr) + self.br) x = np.concatenate((ar[i, :] * ah[i - 1, :], a1[i, :])) ahhat[i, :] = tanh(np.dot(x, self.wh) + self.bh) ah[i, :] = (1 - az[i, :]) * ah[i - 1, :] + az[i, :] * ahhat[i, :] a2 = tanh(np.dot(ah, self.w2) + self.b2) return [a1, az, ar, ahhat, ah, a2]
def _forward_propagation(self, X): W1 = self.params['W1'] b1 = self.params['b1'] W2 = self.params['W2'] b2 = self.params['b2'] Z1 = np.dot(W1, X) + b1 A1 = tanh(Z1) Z2 = np.dot(W2, A1) + b2 A2 = sigmoid(Z2) self.caches['Z1'] = Z1 self.caches['A1'] = A1 self.caches['Z2'] = Z2 self.caches['A2'] = A2 return A2
def linear_activation_forward(A_prev, W, b, activation): if activation == "sigmoid": Z, linear_cache = linear_forward(A_prev, W, b) A = ut.sigmoid(Z) elif activation == "relu": Z, linear_cache = linear_forward(A_prev, W, b) A = ut.relu(Z) elif activation == "tanh": Z, linear_cache = linear_forward(A_prev, W, b) A = ut.tanh(Z) cache = (linear_cache, Z) return A, cache
def feedforward(self, x): """ :param x: robot.sensor_values() :return: Vr,Vl """ self.input = x[0] # hidden layer (input ~+ prev_hidden) self.input = np.round(self.input, 5) prev_values = np.round(self.layer_1_values[-1], 5) self.layer_1 = utils.sigmoid(np.dot(self.input, self.synapse_0) + np.dot(prev_values, self.synapse_h)) if RNN else utils.sigmoid(np.dot(self.input, self.synapse_0)) # output layer np.round(self.layer_1, 5) self.output = utils.tanh(np.dot(self.layer_1, self.synapse_1)) # store hidden layer so we can use it in the next time step self.layer_1_values.append(copy.deepcopy(self.layer_1)) return np.round(self.output,5)
def step(prev, elems): # gather previous internal state and output state if options['use_dropout']: m_, x_, dp_ = elems else: m_, x_ = elems h_, c_, _, _, _ = prev preact = tf.matmul(h_, U, name="MatMul_preact") # (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling preact = preact + x_ i = _slice(preact, 0, dim) # (64,512) (0-511) or (m,512) in sampling f = _slice(preact, 1, dim) # (64,512) (512,1023) or (m,512) in sampling o = _slice(preact, 2, dim) # (64,512) (1024-1535) or (m,512) in sampling if options['use_dropout']: i = i * _slice(dp_, 0, dim) f = f * _slice(dp_, 1, dim) o = o * _slice(dp_, 2, dim) i = tf.sigmoid(i) f = tf.sigmoid(f) o = tf.sigmoid(o) c = tf.tanh(_slice(preact, 3, dim)) # (64,512) (1024-1535) or (m,512) in sampling c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ # (m,1)*(m,512) + (m,1)*(m,512) = (m,512) in sampling h = o * tf.tanh(c) # (m,512)*(m,512) = (m,512) in sampling h = m_[:, None] * h + (1. - m_)[:, None] * h_ # attention pstate_ = tf.matmul(h, Wd_att) # shape = (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling pctx_t = pctx_ + pstate_[:, None, :] # shape = (64,28,2048)+(64,?,2048) = (64,28,2048) # DOUBT pctx_ += ?? VERIFIED # (1,28,2048) + (m,?,2048) = (m,28,2048) pctx_t = tanh(pctx_t) alpha = batch_matmul(pctx_t, U_att) + c_att # (64,28,2048)*(2048,1) + (1,) = (64,28,1) or (m,28,1) in sampling alpha_pre = alpha alpha_shape = tf.shape(alpha) alpha = tf.nn.softmax(tf.reshape(alpha,[alpha_shape[0], alpha_shape[1]])) # softmax (64,28) or (m,28) in sampling ctx_ = tf.reduce_sum((context * alpha[:, :, None]), 1) # (m, ctx_dim) # (64*28*2048)*(64,28,1).sum(1) = (64,2048) or (m,2048) in sampling if options['selector']: sel_ = tf.sigmoid(tf.matmul(h_, W_sel) + b_sel) # (64,512)*(512,1)+(scalar) = (64,1) or (m,1) in sampling sel_shape = tf.shape(sel_) sel_ = tf.reshape(sel_,[sel_shape[0]]) # (64,) or (m,) in sampling ctx_ = sel_[:, None] * ctx_ # (64,1)*(64,2048) = (64,2048) or (m,2048) in sampling else: sel_ = tf.zeros(shape=(n_samples,), dtype=tf.float32) rval = [h, c, alpha, ctx_, sel_] return rval
def dis(self,x,training): x = tf.reshape(x,shape=[-1,self.shape,self.shape,3]) scope = 'dis_' layer = lrelu(conv2d(x,self.weights[scope+'w_conv1'])+self.biases[scope+'b_conv1']) for i in range(1,4): conv = prelu(conv2d(layer,self.weights[scope+'w_conv'+str(i+1)])+self.biases[scope+'b_conv'+str(i+1)],scope+'w_conv'+str(i+1)) conv = maxpool2d(conv) conv = tf.nn.dropout(conv,self.keep_rate) layer = conv fc = tf.reshape(layer,[-1, int(self.shape/8)*int(self.shape/8)*256]) fc = lrelu(tf.matmul(fc,self.weights[scope+'w_fc'])+self.biases[scope+'b_fc']) fc = tf.nn.dropout(fc,self.keep_rate) output = tf.matmul(fc,self.weights[scope+'out'])+self.biases[scope+'out'] output = (tanh(output)+1.0)*0.5 return output
def _step(m_, x_, # sequences h_, c_, a_, ctx_, # outputs_info dp_=None # non_sequences ): # attention pstate_ = T.dot(h_, Wd_att) pstate_ = pctx_ + pstate_[:,None,:] pstate_ = tanh(pstate_) alpha = T.dot(pstate_, U_att)+c_att alpha_shp = alpha.shape alpha = T.nnet.softmax(alpha.reshape([alpha_shp[0], alpha_shp[1]])) # softmax ctx_ = (context * alpha[:, :, None]).sum(1) # (m, ctx_dim) if options['selector']: sel_ = T.nnet.sigmoid(T.dot(h_, W_sel) + b_sel) sel_ = sel_.reshape([sel_.shape[0]]) ctx_ = sel_[:,None] * ctx_ preact = T.dot(h_, U) preact += x_ preact += T.dot(ctx_, Wc) i = _slice(preact, 0, dim) f = _slice(preact, 1, dim) o = _slice(preact, 2, dim) if options['use_dropout']: i *= _slice(dp_, 0, dim) f *= _slice(dp_, 1, dim) o *= _slice(dp_, 2, dim) i = T.nnet.sigmoid(i) f = T.nnet.sigmoid(f) o = T.nnet.sigmoid(o) c = T.tanh(_slice(preact, 3, dim)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * T.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ rval = [h, c, alpha, ctx_] return rval
tuple(pgn.read_game(f).mainline())).board(), limit=engine.Limit(time=.1), info=engine.INFO_SCORE) except AttributeError: break for kwd, x in zip( kwds.values(), (bitboard(board, dtype=int), moves.index( (play_result.move if board.turn else chess.Move( *(len(chess.SQUARES) - np.array( (play_result.move.from_square, play_result.move.to_square)) - 1), promotion=play_result.move.promotion)).uci()), tanh(play_result.info["score"].relative.score( mate_score=7625), k=.0025))): kwd.append(x) except (AttributeError, IndexError, ValueError): continue if checkpoint and not len(kwds["X"]) % checkpoint: savez() savez() await uci_protocol.quit() async def main() -> None: semaphore = asyncio.Semaphore(value=3) await asyncio.gather( *(synchronize(semaphore)(fetch)(file, checkpoint=10000) for file in glob.glob(_path("../data/*.pgn"))))
def build_sampler(self, tfparams, options, use_noise, ctx0, ctx_mask, x, bo_init_state_sampler, to_init_state_sampler, bo_init_memory_sampler, to_init_memory_sampler, mode=None): # ctx: # frames x ctx_dim ctx_ = ctx0 counts = tf.reduce_sum(ctx_mask, axis=-1) # scalar ctx = ctx_ ctx_mean = tf.reduce_sum(ctx, axis=0) / counts # (2048,) ctx = tf.expand_dims(ctx, 0) # (1,28,2048) # initial state/cell bo_init_state = self.layers.get_layer('ff')[1](tfparams, ctx_mean, options, prefix='ff_state', activ='tanh') # (512,) bo_init_memory = self.layers.get_layer('ff')[1](tfparams, ctx_mean, options, prefix='ff_memory', activ='tanh') # (512,) to_init_state = tf.zeros( shape=(options['lstm_dim'], ), dtype=tf.float32) # DOUBT : constant or not? # (512,) to_init_memory = tf.zeros(shape=(options['lstm_dim'], ), dtype=tf.float32) # (512,) init_state = [bo_init_state, to_init_state] init_memory = [bo_init_memory, to_init_memory] print 'building f_init...', f_init = [ctx0] + init_state + init_memory print 'done' init_state = [bo_init_state_sampler, to_init_state_sampler] init_memory = [bo_init_memory_sampler, to_init_memory_sampler] # # if it's the first word, embedding should be all zero emb = tf.cond( tf.reduce_any(x[:, None] < 0), lambda: tf.zeros( shape=(1, tfparams['Wemb'].shape[1]), dtype=tf.float32), lambda: tf.nn.embedding_lookup(tfparams['Wemb'], x)) # (m,512) bo_lstm = self.layers.get_layer('lstm_cond')[1]( tfparams, emb, options, prefix='bo_lstm', mask=None, context=ctx, context_mean=tf.expand_dims(ctx_mean, 0), one_step=True, init_state=init_state[0], init_memory=init_memory[0], use_noise=use_noise, mode=mode) to_lstm = self.layers.get_layer('lstm')[1](tfparams, bo_lstm[0], mask=None, one_step=True, init_state=init_state[1], init_memory=init_memory[1], prefix='to_lstm') next_state = [bo_lstm[0], to_lstm[0]] next_memory = [bo_lstm[1], to_lstm[0]] bo_lstm_h = bo_lstm[0] # (1,512) to_lstm_h = to_lstm[0] # (1,512) alphas = bo_lstm[2] # (1,28) ctxs = bo_lstm[3] # (1,2048) betas = bo_lstm[4] # (1,) if options['use_dropout']: bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise) to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise) # compute word probabilities logit = self.layers.get_layer('ff')[1]( tfparams, bo_lstm_h, options, prefix='ff_logit_bo', activ='linear') # (1,512)*(512,512) = (1,512) if options['prev2out']: logit += emb if options['ctx2out']: to_lstm_h *= (1 - betas[:, None]) # (1,512)*(1,1) = (1,512) ctxs_beta = self.layers.get_layer('ff')[1]( tfparams, ctxs, options, prefix='ff_logit_ctx', activ='linear') # (1,2048)*(2048,512) = (1,512) ctxs_beta += self.layers.get_layer('ff')[1]( tfparams, to_lstm_h, options, prefix='ff_logit_to', activ='linear') # (1,512)+((1,512)*(512,512)) = (1,512) logit += ctxs_beta logit = utils.tanh(logit) # (1,512) if options['use_dropout']: logit = self.layers.dropout_layer(logit, use_noise) # (1,n_words) logit = self.layers.get_layer('ff')[1]( tfparams, logit, options, prefix='ff_logit', activ='linear') # (1,512)*(512,vocab_size) = (1,vocab_size) next_probs = tf.nn.softmax(logit) # next_sample = trng.multinomial(pvals=next_probs).argmax(1) # INCOMPLETE , DOUBT : why is multinomial needed? next_sample = tf.multinomial( next_probs, 1) # draw samples with given probabilities (1,1) next_sample_shape = tf.shape(next_sample) next_sample = tf.reshape(next_sample, [next_sample_shape[0]]) # next word probability print 'building f_next...', f_next = [next_probs, next_sample] + next_state + next_memory print 'done' return f_init, f_next
def build_sampler(self, tparams, options, use_noise, trng, mode=None): # context: #annotations x dim ctx0 = tensor.matrix('ctx_sampler', dtype='float32') # ctx0.tag.test_value = numpy.random.uniform(size=(50,1024)).astype('float32') ctx_mask = tensor.vector('ctx_mask', dtype='float32') # ctx_mask.tag.test_value = numpy.random.binomial(n=1,p=0.5,size=(50,)).astype('float32') ctx0_c = tensor.matrix('ctx_sampler_c', dtype='float32') # ctx0.tag.test_value = numpy.random.uniform(size=(50,1024)).astype('float32') ctx_mask_c = tensor.vector('ctx_mask_c', dtype='float32') ctx_ = ctx0 counts = ctx_mask.sum(-1) ctx = ctx_ ctx_mean = ctx.sum(0) / counts ctx_c_ = ctx0_c counts_c = ctx_mask_c.sum(-1) ctx_c = ctx_c_ ctx_mean_c = ctx_c.sum(0) / counts_c # ctx_mean = ctx.mean(0) ctx = ctx.dimshuffle('x', 0, 1) # initial state/cell bo_init_state = self.layers.get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_state', activ='tanh') bo_init_memory = self.layers.get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_memory', activ='tanh') bo_init_state_c = self.layers.get_layer('ff')[1](tparams, ctx_mean_c, options, prefix='ff_state_c', activ='tanh') bo_init_memory_c = self.layers.get_layer('ff')[1](tparams, ctx_mean_c, options, prefix='ff_memory_c', activ='tanh') bo_init_state += bo_init_state_c bo_init_memory += bo_init_memory_c to_init_state = tensor.alloc(0., options['dim']) to_init_memory = tensor.alloc(0., options['dim']) init_state = [bo_init_state, to_init_state] init_memory = [bo_init_memory, to_init_memory] print 'Building f_init...', f_init = theano.function([ctx0, ctx_mask, ctx0_c, ctx_mask_c], [ctx0] + init_state + init_memory, name='f_init', on_unused_input='ignore', profile=False, mode=mode) print 'Done' x = tensor.vector('x_sampler', dtype='int64') init_state = [ tensor.matrix('bo_init_state', dtype='float32'), tensor.matrix('to_init_state', dtype='float32') ] init_memory = [ tensor.matrix('bo_init_memory', dtype='float32'), tensor.matrix('to_init_memory', dtype='float32') ] # if it's the first word, emb should be all zero emb = tensor.switch(x[:, None] < 0, tensor.alloc(0., 1, tparams['Wemb'].shape[1]), tparams['Wemb'][x]) bo_lstm = self.layers.get_layer('lstm_cond')[1]( tparams, emb, options, prefix='bo_lstm', mask=None, context=ctx, context_c=ctx_c, one_step=True, init_state=init_state[0], init_memory=init_memory[0], trng=trng, use_noise=use_noise, mode=mode) to_lstm = self.layers.get_layer('lstm')[1](tparams, bo_lstm[0], mask=None, one_step=True, init_state=init_state[1], init_memory=init_memory[1], prefix='to_lstm') next_state = [bo_lstm[0], to_lstm[0]] next_memory = [bo_lstm[1], to_lstm[0]] bo_lstm_h = bo_lstm[0] to_lstm_h = to_lstm[0] alphas = bo_lstm[2] alphas_c = bo_lstm[3] ctxs = bo_lstm[4] ctxs_c = bo_lstm[5] weight = bo_lstm[6] if options['use_dropout']: bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise, trng) to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise, trng) logit = self.layers.get_layer('ff')[1](tparams, bo_lstm_h, options, prefix='ff_logit_bo', activ='linear') if options['prev2out']: logit += emb if options['ctx2out']: betas = weight[:, 2] # betas = betas.reshape([betas.shape[1],betas.shape[2]]) to_lstm_h *= betas[:, None] ctxs_beta = self.layers.get_layer('ff')[1](tparams, ctxs, options, prefix='ff_logit_ctx', activ='linear') ctxs_beta_c = self.layers.get_layer('ff')[1]( tparams, ctxs_c, options, prefix='ff_logit_ctx_c', activ='linear') to_lstm_h = self.layers.get_layer('ff')[1](tparams, to_lstm_h, options, prefix='ff_logit_to', activ='linear') logit = logit + ctxs_beta + ctxs_beta_c + to_lstm_h logit = utils.tanh(logit) if options['use_dropout']: logit = self.layers.dropout_layer(logit, use_noise, trng) logit = self.layers.get_layer('ff')[1](tparams, logit, options, prefix='ff_logit', activ='linear') logit_shp = logit.shape next_probs = tensor.nnet.softmax(logit) next_sample = trng.multinomial(pvals=next_probs).argmax(1) # next word probability print 'building f_next...' f_next = theano.function( [x, ctx0, ctx_mask, ctx0_c, ctx_mask_c] + init_state + init_memory, [next_probs, next_sample] + next_state + next_memory, name='f_next', profile=False, mode=mode, on_unused_input='ignore') print 'Done' return f_init, f_next
def build_model(self, tfparams, options, x, mask, ctx, ctx_mask): use_noise = tf.Variable(False, dtype=tf.bool, trainable=False, name="use_noise") x_shape = tf.shape(x) n_timesteps = x_shape[0] n_samples = x_shape[1] # get word embeddings emb = tf.nn.embedding_lookup( tfparams['Wemb'], x, name="inputs_emb_lookup") # (num_steps,64,512) emb_shape = tf.shape(emb) indices = tf.expand_dims(tf.range(1, emb_shape[0]), axis=1) emb_shifted = tf.scatter_nd(indices, emb[:-1], emb_shape) emb = emb_shifted # count num_frames==28 with tf.name_scope("ctx_mean"): with tf.name_scope("counts"): counts = tf.expand_dims( tf.reduce_sum(ctx_mask, axis=-1, name="reduce_sum_ctx_mask"), 1) # (64,1) ctx_ = ctx ctx0 = ctx_ # (64,28,2048) ctx_mean = tf.reduce_sum( ctx0, axis=1, name="reduce_sum_ctx" ) / counts #mean pooling of {vi} # (64,2048) # initial state/cell with tf.name_scope("init_state"): init_state = self.layers.get_layer('ff')[1]( tfparams, ctx_mean, options, prefix='ff_state', activ='tanh') # (64,512) with tf.name_scope("init_memory"): init_memory = self.layers.get_layer('ff')[1]( tfparams, ctx_mean, options, prefix='ff_memory', activ='tanh') # (64,512) # hstltm = self.layers.build_hlstm(['bo_lstm','to_lstm'], inputs, n_timesteps, init_state, init_memory) with tf.name_scope("bo_lstm"): bo_lstm = self.layers.get_layer('lstm_cond')[1]( tfparams, emb, options, prefix='bo_lstm', mask=mask, context=ctx0, context_mean=ctx_mean, one_step=False, init_state=init_state, init_memory=init_memory, use_noise=use_noise) with tf.name_scope("to_lstm"): to_lstm = self.layers.get_layer('lstm')[1](tfparams, bo_lstm[0], mask=mask, one_step=False, prefix='to_lstm') bo_lstm_h = bo_lstm[0] # (t,64,512) to_lstm_h = to_lstm[0] # (t,64,512) alphas = bo_lstm[2] # (t,64,28) ctxs = bo_lstm[3] # (t,64,2048) betas = bo_lstm[4] # (t,64,) if options['use_dropout']: bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise) to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise) # compute word probabilities logit = self.layers.get_layer('ff')[1]( tfparams, bo_lstm_h, options, prefix='ff_logit_bo', activ='linear') # (t,64,512)*(512,512) = (t,64,512) if options['prev2out']: logit += emb if options['ctx2out']: to_lstm_h *= (1 - betas[:, :, None]) # (t,64,512)*(t,64,1) ctxs_beta = self.layers.get_layer('ff')[1]( tfparams, ctxs, options, prefix='ff_logit_ctx', activ='linear') # (t,64,2048)*(2048,512) = (t,64,512) ctxs_beta += self.layers.get_layer('ff')[1]( tfparams, to_lstm_h, options, prefix='ff_logit_to', activ='linear' ) # (t,64,512)+((t,64,512)*(512,512)) = (t,64,512) logit += ctxs_beta logit = utils.tanh(logit) # (t,64,512) if options['use_dropout']: logit = self.layers.dropout_layer(logit, use_noise) # (t,m,n_words) logit = self.layers.get_layer('ff')[1]( tfparams, logit, options, prefix='ff_logit', activ='linear') # (t,64,512)*(512,vocab_size) = (t,64,vocab_size) logit_shape = tf.shape(logit) # (t*m, n_words) probs = tf.nn.softmax( tf.reshape(logit, [logit_shape[0] * logit_shape[1], logit_shape[2] ])) # (t*64, vocab_size) # cost x_flat = tf.reshape(x, [x_shape[0] * x_shape[1]]) # (t*m,) x_flat_shape = tf.shape(x_flat) gather_indices = tf.stack([tf.range(x_flat_shape[0]), x_flat], axis=1) # (t*m,2) cost = -tf.log( tf.gather_nd(probs, gather_indices) + 1e-8) # (t*m,) : pick probs of each word in each timestep cost = tf.reshape(cost, [x_shape[0], x_shape[1]]) # (t,m) cost = tf.reduce_sum( (cost * mask), axis=0 ) # (m,) : sum across all timesteps for each element in batch extra = [probs, alphas, betas] return use_noise, cost, extra
def build_model(self, tparams, options): trng = RandomStreams(1234) use_noise = theano.shared(numpy.float32(0.)) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') # context: #samples x #annotations x dim ctx = tensor.tensor3('ctx', dtype='float32') mask_ctx = tensor.matrix('mask_ctx', dtype='float32') ctx_c = tensor.tensor3('ctx_c', dtype='float32') mask_ctx_c = tensor.matrix('mask_ctx_c', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # index into the word embedding matrix, shift it forward in time emb = tparams['Wemb'][x.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) emb_shifted = tensor.zeros_like(emb) emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1]) emb = emb_shifted counts = mask_ctx.sum(-1).dimshuffle(0, 'x') ctx_ = ctx ctx_c_ = ctx_c ctx0 = ctx_ ctx_mean = ctx0.sum(1) / counts ctx0_c = ctx_c_ ctx_mean_c = ctx0_c.sum(1) / counts # initial state/cell init_state = self.layers.get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_state', activ='tanh') init_memory = self.layers.get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_memory', activ='tanh') init_state_c = self.layers.get_layer('ff')[1](tparams, ctx_mean_c, options, prefix='ff_state_c', activ='tanh') init_memory_c = self.layers.get_layer('ff')[1](tparams, ctx_mean_c, options, prefix='ff_memory_c', activ='tanh') init_state += init_state_c init_memory += init_memory_c # decoder bo_lstm = self.layers.get_layer('lstm_cond')[1]( tparams, emb, options, prefix='bo_lstm', mask=mask, context=ctx0, context_c=ctx0_c, one_step=False, init_state=init_state, init_memory=init_memory, trng=trng, use_noise=use_noise) to_lstm = self.layers.get_layer('lstm')[1](tparams, bo_lstm[0], mask=mask, one_step=False, prefix='to_lstm') bo_lstm_h = bo_lstm[0] to_lstm_h = to_lstm[0] alphas = bo_lstm[2] alphas_c = bo_lstm[3] ctxs = bo_lstm[4] ctxs_c = bo_lstm[5] weight = bo_lstm[6] if options['use_dropout']: bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise, trng) to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise, trng) # compute word probabilities logit = self.layers.get_layer('ff')[1](tparams, bo_lstm_h, options, prefix='ff_logit_bo', activ='linear') if options['prev2out']: logit += emb if options['ctx2out']: betas = weight[:, :, 2] #betas = betas.reshape([betas.shape[1],betas.shape[2]]) to_lstm_h *= betas[:, :, None] ctxs_beta = self.layers.get_layer('ff')[1](tparams, ctxs, options, prefix='ff_logit_ctx', activ='linear') ctxs_beta_c = self.layers.get_layer('ff')[1]( tparams, ctxs_c, options, prefix='ff_logit_ctx_c', activ='linear') to_lstm_h = self.layers.get_layer('ff')[1](tparams, to_lstm_h, options, prefix='ff_logit_to', activ='linear') logit = logit + ctxs_beta + ctxs_beta_c + to_lstm_h logit = utils.tanh(logit) if options['use_dropout']: logit = self.layers.dropout_layer(logit, use_noise, trng) # (t,m,n_words) logit = self.layers.get_layer('ff')[1](tparams, logit, options, prefix='ff_logit', activ='linear') logit_shp = logit.shape # (t*m, n_words) probs = tensor.nnet.softmax( logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]])) # cost x_flat = x.flatten() # (t*m,) cost = -tensor.log(probs[tensor.arange(x_flat.shape[0]), x_flat] + 1e-8) cost = cost.reshape([x.shape[0], x.shape[1]]) cost = (cost * mask).sum(0) extra = [probs, alphas, alphas_c, weight[:, :, 0], weight[:, :, 1]] return trng, use_noise, x, mask, ctx, mask_ctx, ctx_c, mask_ctx_c, cost, extra
def get_next_step(self, cur, prevtime=0): """ 功能:给定一个当前随机游走到的结点cur,这个两个相连的结点(可能有多条边),得出 输出: #return J, q 直接输出下一个节点,以及时间戳 """ G = self.G tmp_key = [] tmp_node = [] tmp_time = [] unnormalized_probs_t = [] unnormalized_probs_a = [] cur_nbrs = list(G.neighbors(cur)) if self.time_biased_type == "simple_graph": #DeepWalk for nbr in cur_nbrs: tmp_node.append(nbr) unnormalized_probs_t.append(1) if len(unnormalized_probs_t) > 0: idx = weight_choice(unnormalized_probs_t) next_node = tmp_node[idx] next_time = 0 next_key = 0 return next_node, next_time, next_key else: return None, None, None #没有符合条件的 else: for nbr in cur_nbrs: nbr_key = list(G.get_edge_data(cur,nbr)) #cur领边的key数组 for k in nbr_key: t = k a = G[cur][nbr][k]['weight'] if self.time_biased_type == "no_time_limit": unnormalized_probs_t.append(1) elif t >= prevtime: unnormalized_probs_a.append(a) if self.time_biased_type == "time_uniform" : unnormalized_probs_t.append(1) elif self.time_biased_type == "time_close_raw" : unnormalized_probs_t.append( self.max_time - t + 1 ) elif self.time_biased_type == "time_close_exp" : unnormalized_probs_t.append( t - prevtime ) else: unnormalized_probs_t.append( t - prevtime + 1 ) tmp_time.append(t) tmp_node.append(nbr) tmp_key.append(k) if self.time_biased_type == "time_close_linear" : unnormalized_probs_t = linear_rank_mapping( unnormalized_probs_t, order='descending' ) elif self.time_biased_type == "time_far_linear" : unnormalized_probs_t = linear_rank_mapping( unnormalized_probs_t) elif self.time_biased_type == "time_freq_tanh": unnormalized_probs_t = tanh(unnormalized_probs_t) elif self.time_biased_type == "time_close_exp": unnormalized_probs_t = softmax(unnormalized_probs_t) if self.amount_biased == "amount_linear": unnormalized_probs_a = linear_rank_mapping(unnormalized_probs_a) elif self.amount_biased == "amount_tanh": unnormalized_probs_a = tanh(unnormalized_probs_a) elif self.amount_biased == "amount_exp": unnormalized_probs_a = softmax(unnormalized_probs_a) if len(unnormalized_probs_t) > 0: #有符合条件的下一个点 if self.amount_biased != "amount_uniform": unnormalized_probs = combine_probs(unnormalized_probs_t, unnormalized_probs_a, self.alpha) else: unnormalized_probs = unnormalized_probs_t selected = weight_choice(unnormalized_probs) next_node = tmp_node[selected] next_time = tmp_time[selected] next_key = tmp_key[selected] return next_node, next_time, next_key else: return None, None, None #没有符合条件的