Ejemplo n.º 1
0
    def index2sa(self, index):
        A = self.getA()
        S = self.getS()
        maxdepth = self.max_depth

        # total cumulative length of A^n
        len_A = cumlen(len(A), maxdepth)

        idx_S = index // len_A
        idx_A = index % len_A

        # Calculate length "bins" to see how many times we repeat A
        lens = [cumlen(len(A), i) for i in range(1, maxdepth + 1)]

        n_A = 1
        for n, l in enumerate(lens):
            if idx_A >= l:
                n_A += 1

        idx_An = idx_A - ([0] + lens)[n_A - 1]

        a = index_product(idx_An, A, n_A)
        s = tuple(S[idx_S])

        return s + a
Ejemplo n.º 2
0
    def test_equivalence(self, test_sul: SUL) -> Tuple[bool, Iterable]:
        A = self.getA()
        S = self.getS()
        E = self.getE()

        n_cols = len(S) * cumlen(len(A), self.max_depth)
        n_rows = len(E)

        mat = lil_matrix((n_rows, n_cols))

        if hasattr(test_sul, 'cache'):
            # Fill mat with cached entries
            print("TODO: implement filling in cached entries")


        while mat.getnnz() < n_cols * n_rows:

            # Calculate row "interestingness"
            row_counts = mat.getnnz(axis=1) + 1
            row_smallestgroupsize = np.zeros(n_rows)
            for i in range(n_rows):
                unique, count = np.unique(mat[i, :].data[0], return_counts=True)
                row_smallestgroupsize[i] = np.min(count) if len(count) > 0 else 1

            row_interestingness = (row_counts / row_smallestgroupsize) / row_counts

            # What row is the most interesting
            row_idx = np.argmax(row_interestingness)
            e = E[row_idx]
            print("Most interesting:", row_idx)

            # Find a random, unfilled spot in this row
            # TODO: find a better solution for this,
            # finding a spot with a lot of columns could take long
            col_idx = random.randint(0, n_cols - 1)
            while col_idx in mat[row_idx, :].rows[0]:
                col_idx = (col_idx + 1) % n_cols

            sa = self.index2sa(col_idx)

            print(row_idx, col_idx)

            print(mat.toarray())

            equivalent, counterexample, output = self._are_equivalent(test_sul, sa + e)
            if not equivalent:
                return equivalent, counterexample
            else:
                mat[row_idx, col_idx] = 1 if output else -1
Ejemplo n.º 3
0
    def sa2index(self, s, a):
        A = self.getA()
        S = self.getS()
        maxdepth = self.max_depth

        s_idx = S.index(s)
        a_idx = product_index(a, A)

        a_n = len(a)

        # Calculate length "bins" to see how many times we repeat A
        lens = [(len(A) * i) for i in range(1, maxdepth + 1)]
        cum_A = sum(lens[0:a_n - 1])

        return s_idx * cumlen(len(A), maxdepth) + a_idx + cum_A
Ejemplo n.º 4
0
    def test_equivalence(self, test_sul: SUL) -> Tuple[bool, Iterable]:
        A = self.getA()
        S = self.getS()
        E = self.getE()

        n_cols = len(S) * cumlen(len(A), self.max_depth)
        n_rows = len(E)

        # Keep track of unique responses and their counts per row
        response_mem = [None] * n_rows
        for i in range(n_rows):
            response_mem[i] = Counter()

        unique_responses = set()

        # Keep track of the total number of queries asked
        n_queries = 0

        if hasattr(test_sul, 'cache'):
            # Fill mat with cached entries
            print("TODO: implement filling in cached entries")

        randomizers = []
        for i in range(n_rows):
            randomizers.append(FormatPreserving(n_cols, os.urandom(128)))

        col_idxes = np.zeros(n_rows, dtype=int)

        tracking = set()

        while n_queries <= n_cols * n_rows:
            n_uniq = len(unique_responses)

            # ---- Calculate row "interestingness" using information entropy
            interestingness = [0] * n_rows

            if n_uniq > 1:
                for row_idx, row_counter in enumerate(response_mem):
                    # If not enough unique values seen, check out this row a bit more
                    if len(row_counter.keys()) < 1:
                        interestingness[row_idx] = 1
                    # We also need at least a few values to reduce the chance of getting stuck
                    elif sum(row_counter.values()) < 50:
                        interestingness[row_idx] = 1
                    # If a row is completely filled, it is not interesting anymore
                    elif sum(row_counter.values()) == n_cols:
                        interestingness[row_idx] = 0
                    # Else calculate the entropy
                    else:
                        row_counts = np.array(list(row_counter.values()))
                        row_entropy = entropy(row_counts / sum(row_counts),
                                              base=n_uniq)
                        interestingness[
                            row_idx] = row_entropy if row_entropy > 0 else 1
            else:
                interestingness = [1] * n_rows

            print(response_mem)
            print(interestingness)

            row_idx = np.argmax(interestingness)

            # Pick a random spot from the interesting row
            col_idx = randomizers[row_idx].fpe(col_idxes[row_idx])
            col_idxes[row_idx] = (col_idxes[row_idx] + 1) % n_cols
            tracking.add(col_idx)

            sa = self.index2sa(col_idx)
            e = E[row_idx]

            equivalent, counterexample, output = self._are_equivalent(
                test_sul, sa + e)
            response_mem[row_idx][output] += 1
            n_queries += 1
            unique_responses.add(output)

            if not equivalent:
                return equivalent, counterexample

        # print(tracking)
        # print(set(range(n_cols)))

        return True, None