Beispiel #1
0
 def __init__(self, X={}, D={}, acsiom=None, P={}):
     self.X = Set()
     self.D = Set()
     self.add_terminal_symbols(X)
     self.add_nonterminal_symbols(D)
     self.set_acsiom(acsiom)
     self.set_prod_rules(P)
Beispiel #2
0
    def toNFA(self):
        """
        convert Grammar to NFA
            1. rule A -> aB convert to δ[Ā, a] = B̄
            2. rule A -> a  convert to δ[Ā, a] = qf
            3. S̄ ∈ F if there is a rule S -> ε
        """
        import NFA

        Q = Set(f"{A}̄" for A in self.N).union(Set("qf"))
        q0 = f"{self.S}̄"
        δ = NFA.δ(Q=Q, Σ=self.Σ)
        F = Set("S̄", "qf") if "ε" in self.P["S"] else Set("qf")

        M = NFA(Q, self.Σ, δ, q0, F)

        for A in self.N:
            for rule in self.P[A]:
                if len(rule) == 2:
                    a, B = rule
                    δ[f"{A}̄", a].add(f"{B}̄")

                elif len(rule) == 1 and rule != "ε":
                    a = rule
                    δ[f"{A}̄", a].add("qf")

        return M
Beispiel #3
0
    def remove_ε(self):
        import CFG
        import re

        Nε = self.Nε

        N = self.N
        S = self.S
        P = CFG.P()

        for A in self.N:
            P[A] = self.P[A] - Set("ε")
            for opt in P[A]:
                for subset in all_subsets(Nε):
                    new_opt = re.sub(f"[ε{''.join(subset)}]", "", str(opt))
                    if new_opt != "":
                        P[A].add(new_opt)

        for opt in self.P[self.S]:
            if all(X in Nε for X in opt):
                N |= (Set("S'"))
                S = "S'"
                P["S'"] = f"ε | {self.S}"
                break

        G = CFG(N, self.Σ, P, S)
        return G
Beispiel #4
0
    def __init__(self,
                 worker_num=10,
                 chunk_size=10000,
                 log_interval=600,
                 data_dir='data',
                 log_dir='log'):
        self.chunk_size = chunk_size
        self.log_interval = log_interval
        self.urls = Queue()
        self.results = Queue()
        self.url_cache = Set()
        self.name_cache = Set()
        self.black_urls = Set()
        self.black_cache = Dict()
        self.chunk_num = 0
        self.parser = HtmlParser(home='https://baike.baidu.com')

        self.last = 0
        self.state = 1

        if not os.path.exists(data_dir):
            os.mkdir(data_dir)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)
        self.data_dir = data_dir
        self.log_dir = log_dir

        self.writer = Thread(target=self._write)
        self.logger = Timer(log_interval, self._log)
        self.spiders = [Thread(target=self._scrap) for _ in range(worker_num)]
Beispiel #5
0
    def _reduce(self, imax=None):
        from utils import roman
        import DFA

        def init(i):
            """
            split into two groups
            I  - non terminal
            II - terminal
            """
            groups = {
                qi: roman(1) if qi not in self.F else roman(2)
                for qi in self.Q
            }

            δ = DFA.δ(Q=self.Q, Σ=self.Σ)
            for qi in (self.Q - self.F) + self.F:
                for a in self.Σ:
                    δ[qi, a] = groups.get(self.δ[qi, a])

            if imax is None or i < imax:
                return step(i + 1, groups, δ)
            return groups, δ

        def step(i, groups, δ):
            new_groups = {}
            force_move = 0
            numeratd_patterns = []
            for i in range(len(Set(groups.values()))):
                for qi, val in groups.items():
                    target = δ.group()[qi]
                    if val == roman(i + 1):
                        if target not in numeratd_patterns:
                            numeratd_patterns.append(target)
                        index = numeratd_patterns.index(target)
                        new_groups[qi] = roman(index + 1 + force_move)
                force_move += len(numeratd_patterns)
                numeratd_patterns = []

            new_δ = DFA.δ()
            for qi in self.Q:
                for a in self.Σ:
                    new_δ[qi, a] = new_groups.get(self.δ[qi, a])

            if (imax is None or i < imax) and groups != new_groups and δ != new_δ:
                return step(i + 1, new_groups, new_δ)
            return new_groups, new_δ

        groups, new_δ = init(0)

        Q = Set(groups.values())
        q0 = groups[self.q0]

        δ = DFA.δ()
        for (qi, a), target in new_δ.items():
            δ[groups[qi], a] = target

        F = Set(groups[qf] for qf in self.F)
        return DFA(Q, self.Σ, δ, q0, F)
Beispiel #6
0
def test_grammar():
    P = G.P({
        "S": "aA | bC | a | ε",
        "A": "bB | aA | b | c",
        "B": "aB | bC | aC | cA | c",
        "C": "a | b | aA | bB"
    })
    A = G(Set("S", "A", "C", "B"), Set("a", "b", "c"), P, "S")
    B = A.toNFA()
Beispiel #7
0
    def _canonize(self):
        import DFA

        Q = Set(chr(ord('A') + i) for i in range(len(self.Q)))
        letterMapping = dict(zip(self.δ.reachables(self.q0), Q))

        δ = DFA.δ()
        for (qi, a), target in self.δ.items():
            δ[letterMapping[qi], a] = letterMapping[target]

        q0 = letterMapping[self.q0]
        F = Set(letterMapping[qf] for qf in self.F)

        return DFA(Q, self.Σ, δ, q0, F)
Beispiel #8
0
    def toTopDownAnalyzer(self):
        import PDA
        L = self.remove_left_recursion()

        δ = PDA.δ()
        M = TopDownAnalyzer(Set("q"), L.Σ, L.N.union(L.Σ), δ, "q", L.S, Set())

        for A in L.N:
            for rule in L.P[A]:
                δ["q", "ε", A].add(("q", rule))

        for a in L.Σ:
            δ["q", a, a].add(("q", "ε"))

        return M
Beispiel #9
0
    def __setitem__(self, key, value):
        if isinstance(value, str):
            super().__setitem__(
                key, Set(map(Rule, map(str.strip, value.split("|")))))

        elif isinstance(value, Set):
            super().__setitem__(key, value)
Beispiel #10
0
    def __setitem__(self, key, val):
        if len(key) == 2:
            super().__setitem__(tuple(map(str, key)), Set(map(str, val)))

        elif len(key) > 2 and len(key) == len(val) + 1:
            for k, v in zip(key[1:], val):
                self.__setitem__((key[0], k), v)
Beispiel #11
0
 def do():
     nonlocal i, N
     i = i + 1
     N[i] = N[i - 1].union(
         Set(A for A in self.N for p in self.P[A]
             if re.sub("[∅" + "".join(N[i - 1]) +
                       "]", "", str(p)).islower()))
Beispiel #12
0
 def do():
     nonlocal i, V
     i = i + 1
     V[i] = V[i - 1].union(
         Set(sym for sym in self.N.union(self.Σ)
             if any(sym in rule for A in V[i - 1] if A.isupper()
                    for rule in self.P[A])))
Beispiel #13
0
 def calc_potentials():
     potentials = {}
     for A in N:
         potentials.setdefault(A, Set())
         for rule in P[A]:
             if rule[0] in N:
                 potentials[A].add(rule[0])
     return potentials
Beispiel #14
0
    def remove_left_recursion(self):
        def calc_potentials():
            potentials = {}
            for A in N:
                potentials.setdefault(A, Set())
                for rule in P[A]:
                    if rule[0] in N:
                        potentials[A].add(rule[0])
            return potentials

        N = self.N.copy()
        P = self.P.copy()

        for i, A in enumerate(N):
            for B in N[:i + 1]:
                if not B in calc_potentials()[A]:
                    continue

                if A == B:
                    α = [rule for rule in P[A] if rule.startswith(B)]
                    β = [rule for rule in P[A] if not rule.startswith(B)]
                    N = Set(f"{A}'").union(N)
                    P[f"{A}'"] |= Set(Rule(rule[1:]) for rule in α) | Set(
                        Rule(rule[1:] + f"{A}'") for rule in α)
                    P[A] = Set(Rule(rule) for rule in β) | Set(
                        Rule(rule + f"{A}'") for rule in β)

                else:
                    α = [rule for rule in P[A] if rule.startswith(B)]
                    β = [rule for rule in P[A] if not rule.startswith(B)]
                    P[A] = Set(Rule(rule) for rule in β) | Set(
                        Rule(ruleB + rule[1:]) for rule in α for ruleB in P[B])

        return CFG(N, self.Σ.copy(), P, self.S)
Beispiel #15
0
    def toNFA(self):
        """
        convert EFA to DFA
            remove ε steps
        """
        import NFA

        δ = NFA.δ()
        for qi in self.Q:
            for a in self.Σ:
                if a == "ε":
                    continue

                step1 = self.δ.get((qi, a), Set())

                step2 = Set()
                for s in self.δ.Dε(qi):
                    step2 |= self.δ.get((s, a), Set())

                step3 = Set()
                for s in step1 | step2:
                    step3 |= self.δ.Dε(s)

                δ[qi, a] = tuple(step3)

        Σ = self.Σ - Set("ε")
        F = (self.F if self.δ.Dε(self.q0).intercept(self.F) == Set() else
             self.F.union(Set(self.q0)))

        return NFA(self.Q, Σ, δ, self.q0, F)
Beispiel #16
0
    def toBottomUpAnalyzer(self):
        import PDA
        # L = self.remove_left_recursion()
        L = self

        ô = PDA.ô()
        M = BottomUpAnalyzer(Set("q", "r"), L.Σ, L.N | L.Σ | Set("⊥"), ô, "q",
                             "⊥", Set("r"))

        for A in L.N:
            for rule in L.P[A]:
                ô["q", "ε", rule].add(("q", A))

        for a in L.Σ:
            ô["q", a, "ε"].add(("q", a))

        ô["q", "ε", "⊥S"].add(("r", "ε"))

        return M
Beispiel #17
0
 def Dε(self, q0):
     result = Set()
     stack = [q0]
     while len(stack) > 0:
         q = stack.pop(0)
         result.add(q)
         targets = self.__getitem__((q, "ε"))
         for target in targets:
             if target not in result:
                 stack.append(target)
             result.add(target)
     return result
Beispiel #18
0
    def __init__(self, Q, Σ, δ, q0, F):
        """

        Q  : set of states
        Σ  : finite alphabet
        δ  : Q × Σ → Q transition function
        q0 : q0 ∈ Q initial state
        F  : F ⊆ Q set of accepting states

        """

        super().__init__(Q, Σ, δ, Set(q0), F)
        self.q0 = str(q0)
Beispiel #19
0
def test_dfa():
    δ = DFA.δ()
    A = DFA(Set(1, 2, 3, 4, 5, 6, 7), Set("a", "b"), δ, 1, Set(3, 5, 6))

    δ[1, "a"] = 2
    δ[1, "b"] = "-"
    δ[2, "a"] = 3
    δ[2, "b"] = 4
    δ[3, "a"] = 6
    δ[3, "b"] = 5
    δ[4, "a"] = 3
    δ[4, "b"] = 2
    δ[5, "a"] = 6
    δ[5, "b"] = 3
    δ[6, "a"] = 2
    δ[6, "b"] = "-"
    δ[7, "a"] = 6
    δ[7, "b"] = 1

    # A.table()
    B = A.minimize()
    B.diagram()
Beispiel #20
0
    def remove_primitive_rules(self):
        """
        remove all rules of type A → B where A,B ∈ N
        """
        import CFG

        P = CFG.P()

        for A in self.N:
            NA = self.Nx(A)
            P[A] = Set(rule for B in NA for rule in self.P[B]
                       if not self.isprimitive(rule))

        return CFG(self.N, self.Σ, P, self.S)
Beispiel #21
0
    def Nx(self, x):
        i = 0
        N = {0: Set(x)}

        def do():
            nonlocal i, N
            i = i + 1
            N[i] = N[i - 1].union(
                Set(rule for A in N[i - 1] for rule in self.P[A]
                    if self.isprimitive(rule)))

        do()
        while N[i] != N[i - 1]:
            do()

        Nx = N[i]
        return Nx
Beispiel #22
0
    def reachables(self, q0):
        """
        return a list of all reachable qi states from the state q0
        """

        result = Set()
        stack = [q0]

        while len(stack) > 0:
            q = stack.pop(0)
            result.add(q)
            for a in self.Σ:
                target = self.__getitem__((q, a))
                if target is not None:
                    if target not in result:
                        stack.append(target)
                    result.add(target)

        return result
Beispiel #23
0
        def resolve(A, B):
            for _ in range(len(P[A])):
                rule = list(P[A].pop(0))
                if rule[0].islower():
                    for i, c in enumerate(rule[1:]):
                        if c.islower() and len(c) == 1:
                            rule[i + 1] = f"{c}̄"

                    rule = "".join(rule)
                    P[A].add(Rule(rule))

                elif rule[0] == B:
                    rules = Set()
                    for rule1 in P[B]:
                        rules.add(Rule(rule1 + "".join(rule[1:])))
                    P[A] |= rules

                else:
                    P[A].add(Rule(rule))
Beispiel #24
0
    def V(self):
        """
        get all reachable symbols
        """
        i = 0
        V = {0: Set(self.S)}

        def do():
            nonlocal i, V
            i = i + 1
            V[i] = V[i - 1].union(
                Set(sym for sym in self.N.union(self.Σ)
                    if any(sym in rule for A in V[i - 1] if A.isupper()
                           for rule in self.P[A])))

        do()
        while V[i] != V[i - 1]:
            do()

        V = V[i]
        return V
Beispiel #25
0
    def Ne(self):
        """
        get all normalised nonterminals
        """

        import re
        i = 0
        N = {0: Set()}

        def do():
            nonlocal i, N
            i = i + 1
            N[i] = N[i - 1].union(
                Set(A for A in self.N for p in self.P[A]
                    if re.sub("[∅" + "".join(N[i - 1]) +
                              "]", "", str(p)).islower()))

        do()
        while N[i] != N[i - 1]:
            do()

        Ne = N[i]
        return Ne
Beispiel #26
0
    def CYK_parser(self, w):
        n = len(w)
        C = np.empty((n, n), dtype=Set)
        for i in range(n):
            for j in range(n):
                C[i, j] = Set()

        for d in self.P:
            for i in range(n):
                # for k in range(0, n-i, -1):
                for k in range(n-i):
                    if self.check_prod_rule((d, w[i:i+k+1])):
                        C[i, i+k].add(d)

        for m in range(2, n+1):
            for i in range(1, n-m+2):
                j = i + m - 1
                for rule in self.get_nonterm_prod_rules():
                    for k in range(i, j):
                        lrule, rrule = rule[1][0], rule[1][1]
                        if lrule in C[i-1, k-1] and rrule in C[k, j-1]:
                            C[i-1, j-1].add(rule[0])
        return C
Beispiel #27
0
        def step(i, groups, δ):
            new_groups = {}
            force_move = 0
            numeratd_patterns = []
            for i in range(len(Set(groups.values()))):
                for qi, val in groups.items():
                    target = δ.group()[qi]
                    if val == roman(i + 1):
                        if target not in numeratd_patterns:
                            numeratd_patterns.append(target)
                        index = numeratd_patterns.index(target)
                        new_groups[qi] = roman(index + 1 + force_move)
                force_move += len(numeratd_patterns)
                numeratd_patterns = []

            new_δ = DFA.δ()
            for qi in self.Q:
                for a in self.Σ:
                    new_δ[qi, a] = new_groups.get(self.δ[qi, a])

            if (imax is None or i < imax) and groups != new_groups and δ != new_δ:
                return step(i + 1, new_groups, new_δ)
            return new_groups, new_δ
Beispiel #28
0
    def Nε(self):
        """
        get all states that can turn into ε
        """

        import re
        i = 0
        N = {0: Set()}

        def do():
            nonlocal i, N
            i = i + 1
            N[i] = N[i - 1].union(
                Set(A for A in self.N for p in self.P[A]
                    if re.sub("[∅ε" + "".join(N[i - 1]) +
                              "]", "", str(p)) == ""))

        do()
        while N[i] != N[i - 1]:
            do()

        Nε = N[i]
        return Nε
Beispiel #29
0
    def toGNF(self):
        """
        each rule must be of format
        A → aB1B2B3...Bn   (a ∈ Σ, B1,B2,B3,...,Bn ∈ N)
        """
        import CFG

        G = self.remove_left_recursion()
        N = Set(reversed(G.N.copy()))
        P = G.P.copy()

        def resolve(A, B):
            for _ in range(len(P[A])):
                rule = list(P[A].pop(0))
                if rule[0].islower():
                    for i, c in enumerate(rule[1:]):
                        if c.islower() and len(c) == 1:
                            rule[i + 1] = f"{c}̄"

                    rule = "".join(rule)
                    P[A].add(Rule(rule))

                elif rule[0] == B:
                    rules = Set()
                    for rule1 in P[B]:
                        rules.add(Rule(rule1 + "".join(rule[1:])))
                    P[A] |= rules

                else:
                    P[A].add(Rule(rule))

        for i, A in enumerate(N):
            for B in N[:i + 1]:
                resolve(A, B)

        return CFG(N, self.Σ, P, self.S)
Beispiel #30
0
def turn_to_HomskyForm(gramm):
    # creating new grammar as copy of argument in Homsky form:
    new_grammar = gramm
    #1) delete long rules:
    for j in range(len(new_grammar.P)):
        keys_list = list(new_grammar.P)
        rules = new_grammar.P[keys_list[j]]
        for m in range(len(rules)):
            if type(rules[m]) == tuple and len(rules[m]) > 2:
                k = len(rules[m])
                for i in range(1, k-2):
                    newNonTerminal = chr(65+j) + str(i+m)
                    new_grammar.D.add(newNonTerminal)
                    new_grammar.D.add(chr(65+j) + str(i+1+m))
                    new_grammar.P[newNonTerminal] = [(rules[m][i], chr(65+j) + str(i+1))]
                newlastNonTerminal = chr(65+j) + str(k-2+m)
                new_grammar.P[newlastNonTerminal] = [((rules[m][k-2]), (rules[m][k-1]))]
                new_grammar.D.add(newlastNonTerminal)
                rules.append((rules[m][0], chr(65+j) + str(1+m)))
                rules.remove(rules[m])

    # 2) delete epsilon-rules:
    # to find rules A => eps:
    S = Set() #set of espilon non-Terms
    for element in new_grammar.P.copy():
        for rule in new_grammar.P[element]:
            if rule == 'eps':
                S.add(element)
    s = S
    while True:
        S = s
        for element in new_grammar.P.copy():
            for rule in new_grammar.P[element]:
                if type(rule) == tuple:
                    if rule[0] in S and rule[1] in S:
                        s.add(element)
                else:
                    if rule in S:
                        s.add(element)
        if s == S:
            break
    #now Eliminate them!
    new_P = new_grammar.P
    for element in new_P.copy():
        rules = new_P[element]
        for rule in rules:
            if type(rule) == tuple:
                for symbol in rule:
                    if symbol in S and len(rule) > 1:
                        temp = list(rule)
                        temp.remove(symbol)
                        new_rule = tuple(temp)
                        rules.append(new_rule)

        delete_reps = set(rules)
        new_P[element] = list(delete_reps)
    for element in new_P.copy():
        if 'eps' in new_P[element]:
            new_P[element].remove('eps')
        if new_P[element] == []:
            del new_P[element]
    new_grammar.P = new_P
    #
    # # # 3) delete the chain prod rules:
    # # # to find unit pairs:
    def unit_pairs_set(D, P):
        the_set = list((i, i) for i in D)
        for element in P:
            for rule in P[element]:
                if rule in D:
                    for item in the_set:
                        if item[1] == element and (item[0], rule[0]) not in the_set:
                            if type(rule) == tuple:
                                the_set.append((item[0], rule[0]))
                            else:
                                the_set.append((item[0], rule))
        return the_set
    pairs_set = unit_pairs_set(new_grammar.D, new_grammar.P)
    # print(pairs_set)
    for pair in pairs_set:
        if pair[0] != pair[1]:
            # tupl = list()
            # tupl.append(pair[1])
            # tupl = tuple(tupl)
            # print(pair)
            new_grammar.P[pair[0]].remove(pair[1])
            new_grammar.P[pair[0]] = new_grammar.P[pair[0]] + new_grammar.P[pair[1]]
            new_grammar.P[pair[0]] = list(set(new_grammar.P[pair[0]]))
    for element in new_grammar.P:
        for rule in new_grammar.P[element]:
            if type(rule) == tuple and len(rule) == 1:
                new_grammar.P[element][new_grammar.P[element].index(rule)] = rule[0]
    # # #4) delete useless elems:
    # #
    # # # delete non-generating non-terms
    set_of_generatings = set()
    set_of_generatings.add(new_grammar.acsiom)
    for element in new_grammar.P:
        for rule in new_grammar.P[element]:
            if type(rule) == tuple:
                # print(rule)
                if rule[0] not in new_grammar.D and rule[1] not in new_grammar.D:
                        set_of_generatings.add(element)
            else:
                if rule not in new_grammar.D:
                        set_of_generatings.add(element)
    while True:
        s = set_of_generatings
        for element in new_grammar.P:
            for rule in new_grammar.P[element]:
                if type(rule) == tuple:
                    if (rule[0] in set_of_generatings or rule[0] in new_grammar.X) and (rule[1] in set_of_generatings or rule[1] in new_grammar.X):
                        s.add(element)
                else:
                    if rule in set_of_generatings:
                        s.add(element)
        if s == set_of_generatings:
            break
    #delete ureachable non-Terms
    #algorithm of search
    found_elements = Set()
    found_elements.add(new_grammar.acsiom)
    for item in found_elements:
        if item in new_grammar.P:
            for rule in new_grammar.P[item]:
                if type(rule) == tuple:
                    for term in rule:
                        if term in new_grammar.D:
                            found_elements.add(term)
                else:
                    if rule in new_grammar.D:
                        found_elements.add(term)

    #delete them all!
    for element in new_grammar.P:
        for rule in new_grammar.P[element]:
            if type(rule) == tuple:
                for term in rule:
                    if term not in set_of_generatings and term in new_grammar.D:
                        new_list = list(rule)
                        new_list.remove(term)
                        new_grammar.P[element].remove(rule)
                        if not(len(new_list) == 1 and new_list[0] == element):
                            new_grammar.P[element].append(new_list[0])
            else:
                if rule not in set_of_generatings and rule in new_grammar.D:
                    new_grammar.P[element].remove(rule)

    for element in new_grammar.P.copy():
        if element not in found_elements:
            del new_grammar.P[element]
    # # #last shtrikh:
    for item in new_grammar.X:
        S1 = 'Z' + str(new_grammar.X.index(item))
        for element in new_grammar.P.copy():
            for rule in new_grammar.P[element]:
                if type(rule) == tuple and item in rule and len(rule) > 1:
                    new_list = list(rule)
                    for i in range(2):
                        if rule[i] == item:
                            new_list[i] = S1
                            new_grammar.D.add(S1)
                    new_grammar.P[element][new_grammar.P[element].index(rule)] = tuple(new_list)
                    new_grammar.P[S1] = [item]
    return new_grammar