Example #1
0
def turn(field):
    col = random.randint(0, 2)
    row = random.randint(0, 2)
    while lib.check(field, col, row) > 0:
        col = random.randint(0, 2)
        row = random.randint(0, 2)
    return col, row
Example #2
0
def prime(x, proj_matrix):
    # x: shape (B, len, dim)
    # proj_matrix (dim, proj_dim)
    _, m = proj_matrix.shape

    # Compute offset in logspace
    norm_x_squared = torch.norm(x, dim=-1).pow(2) * 0.5
    sqrt_m = 0.5 * np.log(m)
    sqrt_2 = 0.5 * np.log(2)
    offset = norm_x_squared + sqrt_m + sqrt_2
    offset = offset.unsqueeze(-1)
    check(offset, [x.shape[0], x.shape[1], 1])

    u = torch.matmul(x, proj_matrix)
    pos = torch.exp(u - offset)
    neg = torch.exp(-u - offset)

    # last dim is feat.
    out = torch.cat([pos, neg], dim=-1)
    return out
Example #3
0
def sensitive_words_check():
    input = request.form.get('q')
    if input == '':
        return jsonify(result='Please enter something!', keywords='empty')
    mode = request.form.get('m', 'default')
    if mode == 'custom':
        ref = 'static/sensitive_words.txt'
    else:
        ref = 'static/default.txt'
    result, reg_keywords, string_keywords = check(input, ref)
    return jsonify(result='\n'.join(result), reg_keywords='|'.join(reg_keywords), string_keywords=string_keywords)
    def forward(self, x, q):
        key_indecies = x[:, 0, :]
        value_indecies = x[:, 1, :]
        bs = value_indecies.shape[0]
        sl = value_indecies.shape[1]
        d, e = self.d, self.e
        check(value_indecies, [bs, sl])
        check(key_indecies, [bs, sl])
        check(q, [bs, 1])

        # embed words and triggers and concatenate them
        with torch.no_grad():
            v_emb = self.embeddingV(value_indecies - self.n_values)
            check(v_emb, [bs, sl, self.n_values])

        k_emb = self.embeddingK(key_indecies)
        check(k_emb, [bs, sl, e])

        x = torch.cat([v_emb, k_emb], dim=-1)
        check(x, [bs, sl, e + self.n_values])

        # embed the query
        q = self.embeddingK(q)
        check(q, [bs, 1, e])

        # compute k,v for each pos in parallel
        K = self.W_k(x)
        V = v_emb
        check(K, [bs, sl, d])
        check(V, [bs, sl, self.n_values])

        # compute Q from q
        Q = self.W_q(q)
        check(Q, [bs, 1, d])

        # compute attention coefs
        A = einsum("bli,bni->bln", K, Q) / np.sqrt(d)
        check(A, [bs, sl, 1])

        # softmax and weighted values
        A = F.softmax(A, dim=1)  # normalise over keys
        y_hat = einsum("bln,bld->bd", A, V)  # sum weighted values
        check(y_hat, [bs, self.n_values])

        return y_hat
Example #5
0
    def forward(self, x, q):
        key_indecies = x[:, 0, :]
        value_indecies = x[:, 1, :]
        bs, sl = value_indecies.shape
        d, e = self.d, self.e
        check(value_indecies, [bs, sl])
        check(key_indecies, [bs, sl])
        check(q, [bs, 1])

        # embed words and keys and concatenate them
        with torch.no_grad():
            v_emb = self.embeddingV(value_indecies - self.n_values)
            check(v_emb, [bs, sl, self.n_values])

        k_emb = self.embeddingK(key_indecies)
        check(k_emb, [bs, sl, e])

        x = torch.cat([v_emb, k_emb], dim=-1)
        check(x, [bs, sl, e + self.n_values])

        # embed the query
        q = self.embeddingK(q)
        check(q, [bs, 1, e])

        # compute k,v for each pos in parallel
        K = self.W_k(x)
        V = v_emb
        check(K, [bs, sl, d])
        check(V, [bs, sl, self.n_values])

        # compute optional variables
        betas = self.W_b(x)
        F = self.W_f(x)
        check(betas, [bs, sl, 1])
        check(F, [bs, sl, d])

        # compute Q from q
        Q = self.W_q(q)
        check(Q, [bs, 1, d])

        # ===== Linear Attentions =====
        if self.attention_type == 'linear':
            K = elu(K, alpha=1.) + 1.
            Q = elu(Q, alpha=1.) + 1.
            check(K, [bs, sl, d])
            check(Q, [bs, 1, d])

        elif self.attention_type == 'tanh':
            K = tanh(K)
            Q = tanh(Q)
            check(K, [bs, sl, d])
            check(Q, [bs, 1, d])

        elif self.attention_type == 'favor':
            m = self.arg
            # Omega: generate random projection matrix
            Omega = orthogonal_random_matrix_(d, m, x.device)
            check(Omega, [d, m])

            K = prime(K, Omega)
            Q = prime(Q, Omega)
            check(K, [bs, sl, m * 2])
            check(Q, [bs, 1, m * 2])

        elif self.attention_type == 'dpfp':
            check(K, [bs, sl, d])
            check(Q, [bs, 1, d])
            nu = self.arg
            r = lambda x: relu(x)  # relu or exp

            def dpfp(x, nu):
                x = cat([r(x), r(-x)], dim=-1)
                x_rolled = cat(
                    [x.roll(shifts=j, dims=-1) for j in range(1, nu + 1)],
                    dim=-1)
                x_repeat = cat([x] * nu, dim=-1)
                return x_repeat * x_rolled

            K = dpfp(K, nu)
            Q = dpfp(Q, nu)

            check(K, [bs, sl, d * 2 * nu])
            check(Q, [bs, 1, d * 2 * nu])
        else:
            raise Exception(
                f"attention not implemented for \"{self.attention_type}\"")

        # ===== Update Rules =====
        p = Q.shape[-1]
        check(V, [bs, sl, self.n_values])
        check(K, [bs, sl, p])
        check(Q, [bs, 1, p])
        check(betas, [bs, sl, 1])

        if self.update_rule == "sum":
            # sum outerproducts of every v and k
            VK = einsum("blv,blk->bvk", V, K)

            # sum keys to normalise
            Z = K.sum(dim=1)

        elif self.update_rule == "fwm":
            # fast weight memory update rule as done by Schlag et. al. (2021)
            betas = torch.sigmoid(betas)
            check(betas, [bs, sl, 1])

            # first update has no old part
            v = V[:, 0, :]
            k = K[:, 0, :]
            beta = betas[:, 0, :]
            W = einsum("bv,bk->bvk", v, k * beta)

            for i in range(1, sl):
                v = V[:, i, :]
                k = K[:, i, :]
                beta = betas[:, i, :]

                old_v = einsum("bvk,bk->bv", W, k)
                W = W - einsum("bv,bk->bvk", old_v, k)
                new_v = beta * v + (1. - beta) * old_v
                W = W + einsum("bv,bk->bvk", new_v, k)

                scale = relu(W.view(bs, -1).norm(dim=-1) - 1) + 1
                W = W / scale.reshape(bs, 1, 1)
            VK = W

        elif self.update_rule == "ours":
            betas = torch.sigmoid(betas)
            check(betas, [bs, sl, 1])

            W = torch.zeros(bs, self.n_values, p).to(K.device)

            for i in range(0, sl):
                v = V[:, i, :]
                k = K[:, i, :]
                beta = betas[:, i, :]
                n = k.sum(dim=-1, keepdim=True)

                # slow implementation
                v_bar = einsum("bdp,bp->bd", W, k / n)
                W = W - einsum("bd,bp->bdp", v_bar, k / n)

                new_v = beta * v + (1. - beta) * v_bar
                W = W + einsum("bd,bp->bdp", new_v, k / n)
            VK = W

        else:
            raise NotImplementedError("Invalid update_rule: ",
                                      self.update_rule)
        check(VK, [bs, self.n_values, p])

        # ===== Inference / Query Memory =====
        if self.update_rule == "sum":
            check(Z, [bs, p])
            new_V = einsum("bvp,blp->blv", VK, Q) / (
                einsum("bp,blp->bl", Z, Q).unsqueeze(-1) + 1e-6)

        elif self.update_rule == "fwm":
            new_V = einsum("bvp,blp->blv", VK, Q)
            new_V = layer_norm(new_V, [
                self.n_values,
            ],
                               weight=None,
                               bias=None)

        elif self.update_rule == "ours":
            n = torch.sum(Q, dim=-1, keepdim=True) + 1e-6
            new_V = einsum("bvp,blp->blv", VK, Q / n)

        check(new_V, [bs, 1, self.n_values])
        y_hat = new_V.squeeze(1)
        check(y_hat, [bs, self.n_values])

        return y_hat
Example #6
0
def work():

    regularExpression = data.regularExpression

    #添加与操作符号
    lenRE = len(regularExpression)
    temp = "("
    for id in range(lenRE - 1):
        temp += regularExpression[id]
        if (lib.check(regularExpression[id], regularExpression[id + 1]) == 1):
            temp += '·'
    temp += regularExpression[lenRE - 1] + ")"
    regularExpression = temp
    stk = []
    inversePolishexpression = ""

    #求逆波兰表达式
    for x in regularExpression:
        if (lib.checkLetter(x) == 1):
            inversePolishexpression += x
            continue
        if (x == '('):
            stk.append(x)
            continue
        if (x == ')'):
            while (1):
                if (stk[-1] == '('):
                    stk.pop()
                    break
                inversePolishexpression += stk[-1]
                stk.pop()
            continue
        while (1):
            if (stk[-1] == '('):
                stk.append(x)
                break
            if (lib.comparePriority(stk[-1], x) == 1):
                inversePolishexpression += stk[-1]

                stk.pop()
            else:
                stk.append(x)
                break
    print("inversePolishexpression is %s" % (inversePolishexpression))
    data.inversePolishexpression = inversePolishexpression

    #求NFA
    id = 1
    stk = []
    for x in inversePolishexpression:
        if (lib.checkLetter(x) == 1):
            stk.append(lib.NFA())
            stk[-1].startID = id
            stk[-1].ID.append(id)
            id += 1
            stk[-1].acceptID = id
            stk[-1].ID.append(id)
            stk[-1].side[id - 1] = []
            stk[-1].side[id] = []
            stk[-1].side[id - 1].append([id, x])
            id += 1
        elif (x == '·'):
            object2 = stk.pop()
            object1 = stk.pop()
            for x in object2.ID:
                object1.ID.append(x)
            for x in object2.side:
                object1.side[x] = object2.side[x]
            object1.side[id] = []
            object1.side[id].append([object1.startID, ' '])
            object1.side[object1.acceptID].append([object2.startID, ' '])
            object1.startID = id
            object1.ID.append(id)
            id += 1
            object1.side[id] = []
            object1.side[object2.acceptID].append([id, ' '])
            object1.acceptID = id
            object1.ID.append(id)
            id += 1
            stk.append(object1)
            del object2
        elif (x == '|'):
            object2 = stk.pop()
            object1 = stk.pop()
            for x in object2.ID:
                object1.ID.append(x)
            for x in object2.side:
                object1.side[x] = object2.side[x]
            object1.side[id] = []
            object1.side[id].append([object1.startID, ' '])
            object1.side[id].append([object2.startID, ' '])
            object1.startID = id
            object1.ID.append(id)
            id += 1
            object1.side[id] = []
            object1.side[object1.acceptID].append([id, ' '])
            object1.side[object2.acceptID].append([id, ' '])
            object1.acceptID = id
            object1.ID.append(id)
            id += 1
            stk.append(object1)
            del object2
        else:
            object1 = stk.pop()
            object1.side[object1.acceptID].append([object1.startID, ' '])
            object1.side[id] = []
            object1.side[id].append([object1.startID, ' '])
            object1.startID = id
            object1.ID.append(id)
            id += 1
            object1.side[id] = []
            object1.side[object1.acceptID].append([id, ' '])
            object1.acceptID = id
            object1.ID.append(id)
            id += 1
            object1.side[object1.startID].append([object1.acceptID, ' '])
            stk.append(object1)
    NFA = stk[-1]
    stk.pop()
    data.NFA = NFA
    temp = {}
    temp[NFA.acceptID] = 1
    lib.generateGraph("NFA", NFA.ID, NFA.startID, temp, NFA.side)

    #求DFA
    visble = {}
    DFA = lib.DFA()
    array = []
    queue = []
    top = 0
    end = 0

    def findPoint(id):
        if (array.count(id)): return
        array.append(id)
        for x in NFA.side[id]:
            if (x[1] == ' '):
                findPoint(x[0])

    findPoint(NFA.startID)
    array.sort()
    for i in range(0, array.__len__()):
        array[i] = str(array[i])
    string = ",".join(array)
    visble[string] = 1
    queue.append(string)
    end += 1
    DFA.ID.append(string)
    DFA.startID = string
    DFA.side[string] = []

    while (top < end):
        temp = queue[top].split(",")
        for i in range(0, temp.__len__()):
            temp[i] = int(temp[i])
        for x in range(0, 26):
            array = []
            for y in temp:
                for z in NFA.side[y]:
                    if (z[1] == chr(x + ord('a'))):
                        findPoint(z[0])
            array.sort()
            for i in range(0, array.__len__()):
                array[i] = str(array[i])
            string = ",".join(array)
            if (string.__len__() == 0): continue
            if (visble.get(string, 0)):
                DFA.side[queue[top]].append([string, chr(x + ord('a'))])
                continue
            visble[string] = 1
            DFA.ID.append(string)
            DFA.side[string] = []
            DFA.side[queue[top]].append([string, chr(x + ord('a'))])
            queue.append(string)
            end += 1
        top += 1

    for x in DFA.ID:
        array = x.split(",")
        for i in range(0, array.__len__()):
            array[i] = int(array[i])
        if (array.count(NFA.acceptID)): DFA.acceptID[x] = 1

    print(NFA.acceptID)
    print(visble)
    print(DFA.side)
    print(DFA.acceptID)
    print(DFA.ID.__len__())
    data.DFA = DFA
    lib.generateGraph("DFA", DFA.ID, DFA.startID, DFA.acceptID, DFA.side)

    #DFA转最小化DFA
    queue = []
    for i in range(0, DFA.ID.__len__()):
        queue.append([])
    for i in range(0, DFA.ID.__len__()):
        for j in range(i + 1, DFA.ID.__len__()):
            if (DFA.acceptID.get(DFA.ID[i], 0)):
                DFA.ID[i], DFA.ID[j] = DFA.ID[j], DFA.ID[i]
    array = []
    dict = {}

    if (DFA.ID.__len__() - DFA.acceptID.__len__()):
        dict[DFA.ID.__len__() - DFA.acceptID.__len__()] = 1
        array.append(DFA.ID.__len__() - DFA.acceptID.__len__())
    array.append(DFA.ID.__len__())
    dict[DFA.ID.__len__()] = 1
    array.sort()
    for x in queue:
        for i in range(0, 26):
            x.append(-1)
    has = {}
    id = 0
    for i in range(0, DFA.ID.__len__()):
        if (i >= array[id]): id += 1
        has[DFA.ID[i]] = id
    print(has)
    while (1):
        for i in range(0, DFA.ID.__len__()):
            for y in DFA.side[DFA.ID[i]]:
                queue[i][ord(y[1]) - ord('a')] = has[y[0]]
        pre = 0
        for x in array:
            for i in range(pre, x):
                for j in range(i + 1, x):
                    flag = 0
                    for r in range(0, 26):
                        if (queue[i][r] > queue[j][r]):
                            flag = 1
                            break
                    if (flag):
                        queue[i], queue[j] = queue[j], queue[i]
                        DFA.ID[i], DFA.ID[j] = DFA.ID[j], DFA.ID[i]
            pre = x
        flag = 0
        for i in range(1, queue.__len__()):
            if (queue[i - 1] != queue[i]):
                if (dict.get(i, 0) == 0):
                    dict[i] = 1
                    array.append(i)
                    flag = 1
        array.sort()
        id = 0
        for i in range(0, DFA.ID.__len__()):
            if (i >= array[id]): id += 1
            has[DFA.ID[i]] = id
        if (flag == 0): break
    print(has)
    data.queue = queue
    print(queue)

    DFA.startID = has[DFA.startID]
    temp = DFA.acceptID
    DFA.acceptID = {}
    for x in temp:
        DFA.acceptID[has[x]] = 1
    dict = {}
    temp = []
    for i in range(0, DFA.ID.__len__()):
        if (dict.get(has[DFA.ID[i]], 0)): continue
        dict[has[DFA.ID[i]]] = 1
        temp.append(queue[i])
    DFA.side = {}
    print(temp)

    for i in range(0, array.__len__()):
        DFA.side[i] = []
        for j in range(0, 26):
            if (temp[i][j] == -1): continue
            DFA.side[i].append([temp[i][j], chr(j + ord('a'))])

    DFA.ID = []
    for i in range(0, array.__len__()):
        DFA.ID.append(i)
    print('---------------')
    print(DFA.ID)
    print(DFA.acceptID)
    print(DFA.startID)
    print(DFA.side)
    data.minDFA = DFA
    lib.generateGraph("minDFA", DFA.ID, DFA.startID, DFA.acceptID, DFA.side)
def train(dataloader, model, steps, lr, device, batch_size, log_every,
          test_every, test_sequences, stop_criterion, log_folder):
    model.to(device)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    sum_duration = 0
    count = 0
    best_loss = np.inf
    strike = 0
    max_strike = 10
    file_name = model.get_name() + "_" + dataloader.get_name() + ".csv"
    csv_writer = CsvWriter(column_names=["step", "eval-loss"],
                           path=log_folder,
                           file_name=file_name)
    print("Logging to ... ", csv_writer.csv_file)
    seq_len = dataloader.seq_len
    
    for step in range(1, steps+1):
        model.train()

        # get batch
        batch_x, batch_q, batch_y = dataloader.get_batch(batch_size, device)
        check(batch_x, [batch_size, 2, seq_len])

        # forward pass
        start_time = time.time()
        batch_y_hat = model(batch_x, batch_q)
        check(batch_y_hat, [batch_size, model.n_values])
        check(batch_y, [batch_size, 1])

        # get target vectors
        with torch.no_grad():
            # due to the somewhat awkward contraint of not training the value implementations the values
            # are have their own not trained embedding which requires us to properly offset the indecies.
            batch_y = model.embeddingV(batch_y - model.n_values).squeeze(1)
        check(batch_y, [batch_size, model.n_values])

        # reconstruction loss
        loss = 0.5 * (batch_y - batch_y_hat).pow(2)
        loss = loss.sum(dim=-1).mean()
        nan_checker(loss)

        # gradient descent step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        sum_duration += time.time() - start_time
        count += 1

        # terminal log
        if step % log_every == 0 and step != 0:
            print("step {:5}: loss={:.4f}".format(step, loss))

        # evaluation
        if step % test_every == 0 and step != 0:
            losses = []
            test_seconds = []
            test_count = 0
            for _ in range(test_sequences):
                model.eval()

                # get eval batches
                full_batch_x, full_batch_q, full_batch_y = dataloader.get_all_queries(device)
                # here batch size is in fact the number of keys due to the get_all_queries call

                # split large eval batch into batches no larger than training batch_size
                full_bs = full_batch_x.shape[0]
                n_splits = full_bs // batch_size
                rest = full_bs % batch_size
                if rest > 0:
                    splits = [batch_size] * n_splits + [rest]
                else:
                    splits = [batch_size] * n_splits

                batches_x = torch.split(full_batch_x, splits, dim=0)
                batches_q = torch.split(full_batch_q, splits, dim=0)
                batches_y = torch.split(full_batch_y, splits, dim=0)

                for i in range(len(splits)):
                    batch_x = batches_x[i]
                    batch_q = batches_q[i]
                    batch_y = batches_y[i]
                    bs = batch_x.shape[0]

                    # eval forward pass
                    test_start_time = time.time()
                    batch_y_hat = model(batch_x, batch_q)
                    test_seconds.append(time.time() - test_start_time)
                    test_count += 1
                    check(batch_y_hat, [bs, model.n_values])
                    check(batch_y, [bs, 1])

                    batch_y = model.embeddingV(batch_y - model.n_values).squeeze(1)
                    check(batch_y, [bs, model.n_values])
                    loss = 0.5 * (batch_y - batch_y_hat).pow(2)
                    loss = loss.sum(dim=-1).mean()

                    losses.append(loss.cpu().detach().numpy())

            loss_mean = np.mean(losses)
            if loss_mean < best_loss:
                best_loss = loss_mean
                strike = 0
            else:
                strike = strike + 1

            print("train batches/s={:.1f}  test batches/s={:.1f}"
                  .format(count/sum_duration, test_count/np.sum(test_seconds)))
            if device == "cuda":
                print("peak memory allocation={:.1f} * 1000^2 bytes (megabytes)"
                      .format(torch.cuda.max_memory_allocated(0) / 1000**2))  # megabytes (mega = 1000**2)
            print("test loss={:.4f} \n".format(loss_mean))
            sum_duration = count = 0
            csv_writer.write((step, loss_mean))

            # stop training if criterion is met or there was no progress
            if loss_mean <= stop_criterion or strike > max_strike:
                break