def turn(field): col = random.randint(0, 2) row = random.randint(0, 2) while lib.check(field, col, row) > 0: col = random.randint(0, 2) row = random.randint(0, 2) return col, row
def prime(x, proj_matrix): # x: shape (B, len, dim) # proj_matrix (dim, proj_dim) _, m = proj_matrix.shape # Compute offset in logspace norm_x_squared = torch.norm(x, dim=-1).pow(2) * 0.5 sqrt_m = 0.5 * np.log(m) sqrt_2 = 0.5 * np.log(2) offset = norm_x_squared + sqrt_m + sqrt_2 offset = offset.unsqueeze(-1) check(offset, [x.shape[0], x.shape[1], 1]) u = torch.matmul(x, proj_matrix) pos = torch.exp(u - offset) neg = torch.exp(-u - offset) # last dim is feat. out = torch.cat([pos, neg], dim=-1) return out
def sensitive_words_check(): input = request.form.get('q') if input == '': return jsonify(result='Please enter something!', keywords='empty') mode = request.form.get('m', 'default') if mode == 'custom': ref = 'static/sensitive_words.txt' else: ref = 'static/default.txt' result, reg_keywords, string_keywords = check(input, ref) return jsonify(result='\n'.join(result), reg_keywords='|'.join(reg_keywords), string_keywords=string_keywords)
def forward(self, x, q): key_indecies = x[:, 0, :] value_indecies = x[:, 1, :] bs = value_indecies.shape[0] sl = value_indecies.shape[1] d, e = self.d, self.e check(value_indecies, [bs, sl]) check(key_indecies, [bs, sl]) check(q, [bs, 1]) # embed words and triggers and concatenate them with torch.no_grad(): v_emb = self.embeddingV(value_indecies - self.n_values) check(v_emb, [bs, sl, self.n_values]) k_emb = self.embeddingK(key_indecies) check(k_emb, [bs, sl, e]) x = torch.cat([v_emb, k_emb], dim=-1) check(x, [bs, sl, e + self.n_values]) # embed the query q = self.embeddingK(q) check(q, [bs, 1, e]) # compute k,v for each pos in parallel K = self.W_k(x) V = v_emb check(K, [bs, sl, d]) check(V, [bs, sl, self.n_values]) # compute Q from q Q = self.W_q(q) check(Q, [bs, 1, d]) # compute attention coefs A = einsum("bli,bni->bln", K, Q) / np.sqrt(d) check(A, [bs, sl, 1]) # softmax and weighted values A = F.softmax(A, dim=1) # normalise over keys y_hat = einsum("bln,bld->bd", A, V) # sum weighted values check(y_hat, [bs, self.n_values]) return y_hat
def forward(self, x, q): key_indecies = x[:, 0, :] value_indecies = x[:, 1, :] bs, sl = value_indecies.shape d, e = self.d, self.e check(value_indecies, [bs, sl]) check(key_indecies, [bs, sl]) check(q, [bs, 1]) # embed words and keys and concatenate them with torch.no_grad(): v_emb = self.embeddingV(value_indecies - self.n_values) check(v_emb, [bs, sl, self.n_values]) k_emb = self.embeddingK(key_indecies) check(k_emb, [bs, sl, e]) x = torch.cat([v_emb, k_emb], dim=-1) check(x, [bs, sl, e + self.n_values]) # embed the query q = self.embeddingK(q) check(q, [bs, 1, e]) # compute k,v for each pos in parallel K = self.W_k(x) V = v_emb check(K, [bs, sl, d]) check(V, [bs, sl, self.n_values]) # compute optional variables betas = self.W_b(x) F = self.W_f(x) check(betas, [bs, sl, 1]) check(F, [bs, sl, d]) # compute Q from q Q = self.W_q(q) check(Q, [bs, 1, d]) # ===== Linear Attentions ===== if self.attention_type == 'linear': K = elu(K, alpha=1.) + 1. Q = elu(Q, alpha=1.) + 1. check(K, [bs, sl, d]) check(Q, [bs, 1, d]) elif self.attention_type == 'tanh': K = tanh(K) Q = tanh(Q) check(K, [bs, sl, d]) check(Q, [bs, 1, d]) elif self.attention_type == 'favor': m = self.arg # Omega: generate random projection matrix Omega = orthogonal_random_matrix_(d, m, x.device) check(Omega, [d, m]) K = prime(K, Omega) Q = prime(Q, Omega) check(K, [bs, sl, m * 2]) check(Q, [bs, 1, m * 2]) elif self.attention_type == 'dpfp': check(K, [bs, sl, d]) check(Q, [bs, 1, d]) nu = self.arg r = lambda x: relu(x) # relu or exp def dpfp(x, nu): x = cat([r(x), r(-x)], dim=-1) x_rolled = cat( [x.roll(shifts=j, dims=-1) for j in range(1, nu + 1)], dim=-1) x_repeat = cat([x] * nu, dim=-1) return x_repeat * x_rolled K = dpfp(K, nu) Q = dpfp(Q, nu) check(K, [bs, sl, d * 2 * nu]) check(Q, [bs, 1, d * 2 * nu]) else: raise Exception( f"attention not implemented for \"{self.attention_type}\"") # ===== Update Rules ===== p = Q.shape[-1] check(V, [bs, sl, self.n_values]) check(K, [bs, sl, p]) check(Q, [bs, 1, p]) check(betas, [bs, sl, 1]) if self.update_rule == "sum": # sum outerproducts of every v and k VK = einsum("blv,blk->bvk", V, K) # sum keys to normalise Z = K.sum(dim=1) elif self.update_rule == "fwm": # fast weight memory update rule as done by Schlag et. al. (2021) betas = torch.sigmoid(betas) check(betas, [bs, sl, 1]) # first update has no old part v = V[:, 0, :] k = K[:, 0, :] beta = betas[:, 0, :] W = einsum("bv,bk->bvk", v, k * beta) for i in range(1, sl): v = V[:, i, :] k = K[:, i, :] beta = betas[:, i, :] old_v = einsum("bvk,bk->bv", W, k) W = W - einsum("bv,bk->bvk", old_v, k) new_v = beta * v + (1. - beta) * old_v W = W + einsum("bv,bk->bvk", new_v, k) scale = relu(W.view(bs, -1).norm(dim=-1) - 1) + 1 W = W / scale.reshape(bs, 1, 1) VK = W elif self.update_rule == "ours": betas = torch.sigmoid(betas) check(betas, [bs, sl, 1]) W = torch.zeros(bs, self.n_values, p).to(K.device) for i in range(0, sl): v = V[:, i, :] k = K[:, i, :] beta = betas[:, i, :] n = k.sum(dim=-1, keepdim=True) # slow implementation v_bar = einsum("bdp,bp->bd", W, k / n) W = W - einsum("bd,bp->bdp", v_bar, k / n) new_v = beta * v + (1. - beta) * v_bar W = W + einsum("bd,bp->bdp", new_v, k / n) VK = W else: raise NotImplementedError("Invalid update_rule: ", self.update_rule) check(VK, [bs, self.n_values, p]) # ===== Inference / Query Memory ===== if self.update_rule == "sum": check(Z, [bs, p]) new_V = einsum("bvp,blp->blv", VK, Q) / ( einsum("bp,blp->bl", Z, Q).unsqueeze(-1) + 1e-6) elif self.update_rule == "fwm": new_V = einsum("bvp,blp->blv", VK, Q) new_V = layer_norm(new_V, [ self.n_values, ], weight=None, bias=None) elif self.update_rule == "ours": n = torch.sum(Q, dim=-1, keepdim=True) + 1e-6 new_V = einsum("bvp,blp->blv", VK, Q / n) check(new_V, [bs, 1, self.n_values]) y_hat = new_V.squeeze(1) check(y_hat, [bs, self.n_values]) return y_hat
def work(): regularExpression = data.regularExpression #添加与操作符号 lenRE = len(regularExpression) temp = "(" for id in range(lenRE - 1): temp += regularExpression[id] if (lib.check(regularExpression[id], regularExpression[id + 1]) == 1): temp += '·' temp += regularExpression[lenRE - 1] + ")" regularExpression = temp stk = [] inversePolishexpression = "" #求逆波兰表达式 for x in regularExpression: if (lib.checkLetter(x) == 1): inversePolishexpression += x continue if (x == '('): stk.append(x) continue if (x == ')'): while (1): if (stk[-1] == '('): stk.pop() break inversePolishexpression += stk[-1] stk.pop() continue while (1): if (stk[-1] == '('): stk.append(x) break if (lib.comparePriority(stk[-1], x) == 1): inversePolishexpression += stk[-1] stk.pop() else: stk.append(x) break print("inversePolishexpression is %s" % (inversePolishexpression)) data.inversePolishexpression = inversePolishexpression #求NFA id = 1 stk = [] for x in inversePolishexpression: if (lib.checkLetter(x) == 1): stk.append(lib.NFA()) stk[-1].startID = id stk[-1].ID.append(id) id += 1 stk[-1].acceptID = id stk[-1].ID.append(id) stk[-1].side[id - 1] = [] stk[-1].side[id] = [] stk[-1].side[id - 1].append([id, x]) id += 1 elif (x == '·'): object2 = stk.pop() object1 = stk.pop() for x in object2.ID: object1.ID.append(x) for x in object2.side: object1.side[x] = object2.side[x] object1.side[id] = [] object1.side[id].append([object1.startID, ' ']) object1.side[object1.acceptID].append([object2.startID, ' ']) object1.startID = id object1.ID.append(id) id += 1 object1.side[id] = [] object1.side[object2.acceptID].append([id, ' ']) object1.acceptID = id object1.ID.append(id) id += 1 stk.append(object1) del object2 elif (x == '|'): object2 = stk.pop() object1 = stk.pop() for x in object2.ID: object1.ID.append(x) for x in object2.side: object1.side[x] = object2.side[x] object1.side[id] = [] object1.side[id].append([object1.startID, ' ']) object1.side[id].append([object2.startID, ' ']) object1.startID = id object1.ID.append(id) id += 1 object1.side[id] = [] object1.side[object1.acceptID].append([id, ' ']) object1.side[object2.acceptID].append([id, ' ']) object1.acceptID = id object1.ID.append(id) id += 1 stk.append(object1) del object2 else: object1 = stk.pop() object1.side[object1.acceptID].append([object1.startID, ' ']) object1.side[id] = [] object1.side[id].append([object1.startID, ' ']) object1.startID = id object1.ID.append(id) id += 1 object1.side[id] = [] object1.side[object1.acceptID].append([id, ' ']) object1.acceptID = id object1.ID.append(id) id += 1 object1.side[object1.startID].append([object1.acceptID, ' ']) stk.append(object1) NFA = stk[-1] stk.pop() data.NFA = NFA temp = {} temp[NFA.acceptID] = 1 lib.generateGraph("NFA", NFA.ID, NFA.startID, temp, NFA.side) #求DFA visble = {} DFA = lib.DFA() array = [] queue = [] top = 0 end = 0 def findPoint(id): if (array.count(id)): return array.append(id) for x in NFA.side[id]: if (x[1] == ' '): findPoint(x[0]) findPoint(NFA.startID) array.sort() for i in range(0, array.__len__()): array[i] = str(array[i]) string = ",".join(array) visble[string] = 1 queue.append(string) end += 1 DFA.ID.append(string) DFA.startID = string DFA.side[string] = [] while (top < end): temp = queue[top].split(",") for i in range(0, temp.__len__()): temp[i] = int(temp[i]) for x in range(0, 26): array = [] for y in temp: for z in NFA.side[y]: if (z[1] == chr(x + ord('a'))): findPoint(z[0]) array.sort() for i in range(0, array.__len__()): array[i] = str(array[i]) string = ",".join(array) if (string.__len__() == 0): continue if (visble.get(string, 0)): DFA.side[queue[top]].append([string, chr(x + ord('a'))]) continue visble[string] = 1 DFA.ID.append(string) DFA.side[string] = [] DFA.side[queue[top]].append([string, chr(x + ord('a'))]) queue.append(string) end += 1 top += 1 for x in DFA.ID: array = x.split(",") for i in range(0, array.__len__()): array[i] = int(array[i]) if (array.count(NFA.acceptID)): DFA.acceptID[x] = 1 print(NFA.acceptID) print(visble) print(DFA.side) print(DFA.acceptID) print(DFA.ID.__len__()) data.DFA = DFA lib.generateGraph("DFA", DFA.ID, DFA.startID, DFA.acceptID, DFA.side) #DFA转最小化DFA queue = [] for i in range(0, DFA.ID.__len__()): queue.append([]) for i in range(0, DFA.ID.__len__()): for j in range(i + 1, DFA.ID.__len__()): if (DFA.acceptID.get(DFA.ID[i], 0)): DFA.ID[i], DFA.ID[j] = DFA.ID[j], DFA.ID[i] array = [] dict = {} if (DFA.ID.__len__() - DFA.acceptID.__len__()): dict[DFA.ID.__len__() - DFA.acceptID.__len__()] = 1 array.append(DFA.ID.__len__() - DFA.acceptID.__len__()) array.append(DFA.ID.__len__()) dict[DFA.ID.__len__()] = 1 array.sort() for x in queue: for i in range(0, 26): x.append(-1) has = {} id = 0 for i in range(0, DFA.ID.__len__()): if (i >= array[id]): id += 1 has[DFA.ID[i]] = id print(has) while (1): for i in range(0, DFA.ID.__len__()): for y in DFA.side[DFA.ID[i]]: queue[i][ord(y[1]) - ord('a')] = has[y[0]] pre = 0 for x in array: for i in range(pre, x): for j in range(i + 1, x): flag = 0 for r in range(0, 26): if (queue[i][r] > queue[j][r]): flag = 1 break if (flag): queue[i], queue[j] = queue[j], queue[i] DFA.ID[i], DFA.ID[j] = DFA.ID[j], DFA.ID[i] pre = x flag = 0 for i in range(1, queue.__len__()): if (queue[i - 1] != queue[i]): if (dict.get(i, 0) == 0): dict[i] = 1 array.append(i) flag = 1 array.sort() id = 0 for i in range(0, DFA.ID.__len__()): if (i >= array[id]): id += 1 has[DFA.ID[i]] = id if (flag == 0): break print(has) data.queue = queue print(queue) DFA.startID = has[DFA.startID] temp = DFA.acceptID DFA.acceptID = {} for x in temp: DFA.acceptID[has[x]] = 1 dict = {} temp = [] for i in range(0, DFA.ID.__len__()): if (dict.get(has[DFA.ID[i]], 0)): continue dict[has[DFA.ID[i]]] = 1 temp.append(queue[i]) DFA.side = {} print(temp) for i in range(0, array.__len__()): DFA.side[i] = [] for j in range(0, 26): if (temp[i][j] == -1): continue DFA.side[i].append([temp[i][j], chr(j + ord('a'))]) DFA.ID = [] for i in range(0, array.__len__()): DFA.ID.append(i) print('---------------') print(DFA.ID) print(DFA.acceptID) print(DFA.startID) print(DFA.side) data.minDFA = DFA lib.generateGraph("minDFA", DFA.ID, DFA.startID, DFA.acceptID, DFA.side)
def train(dataloader, model, steps, lr, device, batch_size, log_every, test_every, test_sequences, stop_criterion, log_folder): model.to(device) optimizer = torch.optim.Adam(params=model.parameters(), lr=lr) sum_duration = 0 count = 0 best_loss = np.inf strike = 0 max_strike = 10 file_name = model.get_name() + "_" + dataloader.get_name() + ".csv" csv_writer = CsvWriter(column_names=["step", "eval-loss"], path=log_folder, file_name=file_name) print("Logging to ... ", csv_writer.csv_file) seq_len = dataloader.seq_len for step in range(1, steps+1): model.train() # get batch batch_x, batch_q, batch_y = dataloader.get_batch(batch_size, device) check(batch_x, [batch_size, 2, seq_len]) # forward pass start_time = time.time() batch_y_hat = model(batch_x, batch_q) check(batch_y_hat, [batch_size, model.n_values]) check(batch_y, [batch_size, 1]) # get target vectors with torch.no_grad(): # due to the somewhat awkward contraint of not training the value implementations the values # are have their own not trained embedding which requires us to properly offset the indecies. batch_y = model.embeddingV(batch_y - model.n_values).squeeze(1) check(batch_y, [batch_size, model.n_values]) # reconstruction loss loss = 0.5 * (batch_y - batch_y_hat).pow(2) loss = loss.sum(dim=-1).mean() nan_checker(loss) # gradient descent step optimizer.zero_grad() loss.backward() optimizer.step() sum_duration += time.time() - start_time count += 1 # terminal log if step % log_every == 0 and step != 0: print("step {:5}: loss={:.4f}".format(step, loss)) # evaluation if step % test_every == 0 and step != 0: losses = [] test_seconds = [] test_count = 0 for _ in range(test_sequences): model.eval() # get eval batches full_batch_x, full_batch_q, full_batch_y = dataloader.get_all_queries(device) # here batch size is in fact the number of keys due to the get_all_queries call # split large eval batch into batches no larger than training batch_size full_bs = full_batch_x.shape[0] n_splits = full_bs // batch_size rest = full_bs % batch_size if rest > 0: splits = [batch_size] * n_splits + [rest] else: splits = [batch_size] * n_splits batches_x = torch.split(full_batch_x, splits, dim=0) batches_q = torch.split(full_batch_q, splits, dim=0) batches_y = torch.split(full_batch_y, splits, dim=0) for i in range(len(splits)): batch_x = batches_x[i] batch_q = batches_q[i] batch_y = batches_y[i] bs = batch_x.shape[0] # eval forward pass test_start_time = time.time() batch_y_hat = model(batch_x, batch_q) test_seconds.append(time.time() - test_start_time) test_count += 1 check(batch_y_hat, [bs, model.n_values]) check(batch_y, [bs, 1]) batch_y = model.embeddingV(batch_y - model.n_values).squeeze(1) check(batch_y, [bs, model.n_values]) loss = 0.5 * (batch_y - batch_y_hat).pow(2) loss = loss.sum(dim=-1).mean() losses.append(loss.cpu().detach().numpy()) loss_mean = np.mean(losses) if loss_mean < best_loss: best_loss = loss_mean strike = 0 else: strike = strike + 1 print("train batches/s={:.1f} test batches/s={:.1f}" .format(count/sum_duration, test_count/np.sum(test_seconds))) if device == "cuda": print("peak memory allocation={:.1f} * 1000^2 bytes (megabytes)" .format(torch.cuda.max_memory_allocated(0) / 1000**2)) # megabytes (mega = 1000**2) print("test loss={:.4f} \n".format(loss_mean)) sum_duration = count = 0 csv_writer.write((step, loss_mean)) # stop training if criterion is met or there was no progress if loss_mean <= stop_criterion or strike > max_strike: break