Example #1
0
    def run(self):
        currgp = self._sin.gp
        currlp = self._sin.lp
        currparse = None
        n = 1

        #while n < self._sin.mincnt:
        while True:
            amb, sen, ptrees = self._sin.find_ambiguity(currgp, currlp,
                                                      self._sin.backend)
            assert amb
            ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees, sen)
            # save the minimised cfg, lex to target files
            _gp = os.path.join(self._sin.td, "%s.acc" % n)
            _lp = os.path.join(self._sin.td, "%s.lex" % n)
            _senstrp = os.path.join(self._sin.td, "%s.sen" % n)
            _ambstrp = os.path.join(self._sin.td, "%s.ambs" % n)
            print "currgp: %s, _gp: %s " % (currgp, _gp)
            self.write_cfg_lex(ambi_parse, _gp, _lp, _senstrp, _ambstrp)
            self.write_stat(_gp, _lp, _senstrp, _ambstrp)

            currgp = _gp
            currlp = _lp
            currparse = ambi_parse
            n += 1

        return currgp, currlp, currparse.amb_str
Example #2
0
    def run(self, td):
        currgp, currlp = self.ambimin.gp, self.ambimin.lp
        n = 1

        while n <= self.ambimin.mincnt:
            amb, sen, trees = self.find_ambiguity(currgp, currlp)
            assert amb
            ambi_parse = AmbiParse.parse(self, trees)
            gp = os.path.join(td, "%s.acc" % n)
            lp = os.path.join(td, "%s.lex" % n)
            print "currgp: %s, gp: %s " % (currgp, gp)
            MiniUtils.write_cfg_lex(ambi_parse.min_cfg, gp, currlp, lp)
            self.write_stat(gp)

            currgp = gp
            currlp = lp
            n += 1

        # run ambidexter on the minimised grammar
        _gp = self.ambidxt.filter(currgp, str(self.ambimin.duration))
        self.write_stat(_gp)

        if _gp is not None:
            return _gp, currlp

        return currgp, currlp
Example #3
0
    def run_accent(self, sen, gp, lp):
        """ build parser in td using gp+lp, and parse sentence sen."""

        parser = Accent.compile(gp, lp)
        ptrees = Accent.run(parser, sen)
        ambi_parse = AmbiParse.parse(lp, self._sin.lex_ws, ptrees)
        _gp = tempfile.mktemp('.acc', dir=self._sin.td)
        _lp = tempfile.mktemp('.lex', dir=self._sin.td)
        self.write_cfg_lex(ambi_parse, _gp, _lp)

        return _gp, _lp, ambi_parse
Example #4
0
    def run_accent(self, sen, gp, lp, td):
        """ build parser in td using gp+lp, and parse sentence sen."""

        parser = Accent.compile(gp, lp)
        out = Accent.run(parser, sen)
        ambiparse = AmbiParse.parse(self, out)
        _gp = tempfile.mktemp('.acc', dir=td)
        _lp = tempfile.mktemp('.lex', dir=td)
        MiniUtils.write_cfg_lex(ambiparse.min_cfg, _gp, lp, _lp)

        return _gp, _lp
Example #5
0
    def run_accent(self, sen, gf, lf):
        """ build parser in td using gf+lf, and parse sentence sen."""

        parser = Accent.compile(gf, lf)
        ptrees = Accent.run(parser, sen)
        ambi_parse = AmbiParse.parse(lf, self._sin.lex_ws, ptrees, sen)
        #_gp = tempfile.mktemp('.acc', dir=self._sin.td)
        #_lp = tempfile.mktemp('.lex', dir=self._sin.td)
        #_ambstrp = os.path.join(self._sin.td, "%s.ambs" % 'accent')
        #self.write_cfg_lex(ambi_parse, _gp, _lp, _ambstrp)

        return ambi_parse
Example #6
0
    def run(self, td):
        currgp, currlp = self.ambimin.gp, self.ambimin.lp
        n = 1

        while n <= self.ambimin.mincnt:
            amb, sen, trees = self.find_ambiguity(currgp, currlp)
            assert amb
            ambi_parse = AmbiParse.parse(self, trees)
            gp = os.path.join(td, "%s.acc" % n)
            lp = os.path.join(td, "%s.lex" % n)
            print "currgp: %s, gp: %s " % (currgp, gp)
            MiniUtils.write_cfg_lex(ambi_parse.min_cfg, gp, currlp, lp)
            self.write_stat(gp)

            currgp = gp
            currlp = lp
            n += 1

        # run ambidexter on the minimised grammar
        fltr = '-%s' % self.ambimin.fltr
        fltr_outg = '-%s' % self.ambimin.fltr_cfg_outfmt
        opts = ['-h', fltr, fltr_outg]
        t1 = time.time()
        _gp = self.ambidxt.filter(currgp, str(self.ambimin.ambi_duration), opts)
        t2 = time.time()
        self.write_stat(_gp)

        if _gp is None:
            return currgp, currlp 

        print "=> filtered grammar : " , _gp
        tp = self.fix_sym_tokens_bug(currgp, _gp, td)

        # run ambidexter on the minimised grammar
        opts = ['-pg', '-ik', '0']
        t = self.ambimin.ambi_duration - (t2 - t1)
        print "time remaining: " , t
        ambisen, accsen = self.ambidxt.ambiguous(tp, currlp, str(t), opts)
        print "accsen: " , accsen
        if accsen is not None:
            __gp, __lp = self.run_accent(accsen, tp, currlp, td)
            self.write_stat(__gp)
            return __gp, __lp

        # AmbiDexter didn't find anything
        self.write_stat(None)
        return tp, currlp
Example #7
0
    def run(self, td):
        gp, lp = self.ambimin.gp, self.ambimin.lp
        amb, sen, trees = self.find_ambiguity(gp, lp)
        assert amb
        ambi_parse = AmbiParse.parse(self, trees)
        # save the minimised cfg, lex to target files
        _gp = os.path.join(td, "%s.acc" % 0)
        _lp = os.path.join(td, "%s.lex" % 0)
        print "gp: %s, _gp: %s " % (gp, _gp)
        MiniUtils.write_cfg_lex(ambi_parse.min_cfg, _gp, lp, _lp)

        # run ambidexter on the minimised grammar
        sen, r = self.ambidexter(_gp)
        if r != 0:
            return _gp, _lp, None

        return _gp, _gp, sen
Example #8
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1
        found = True

        while found:
            found = False
            lex = Lexer.parse(open(currlp, "r").read())
            cfg = CFG.parse(lex, open(currgp, "r").read())
            # work on rules with no of alts > 1
            keys = [r.name for r in cfg.rules if len(r.seqs) > 1]
            for key in keys:
                seqs = cfg.get_rule(key).seqs
                for i in range(len(seqs)):
                    _cfg = self.cfg_minus_alt(cfg, key, i)
                    if self.valid_cfg(_cfg):
                        # we could minimise lex first before pruning
                        _cfg_p = self.prune_cfg(_cfg, lex)
                        _gf, _lf = "%s.acc" % n, "%s.lex" % n
                        _gp = os.path.join(self._sin.td, "pruned.%s" % _gf)
                        CFG.write(_cfg_p, _gp)
                        n += 1
                        amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint)
                        if amb:
                            ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
                            __gp = os.path.join(self._sin.td, "min.%s" % _gf)
                            __lp = os.path.join(self._sin.td, "min.%s" % _lf)
                            self.write_cfg_lex(ambi_parse, __gp, __lp)
                            self.write_stat(__gp, __lp)
                            found = True
                            currparse = ambi_parse
                            currgp = __gp
                            currlp = __lp
                            break

                if found:
                    break

        return currgp, currlp, currparse.amb_str
Example #9
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1

        while n <= self._sin.mincnt:
            amb, sen, ptrees = self._sin.find_ambiguity(currgp, currlp,
                                                        self._sin.backend)
            assert amb
            ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
            # save the minimised cfg, lex to target files
            _gp = os.path.join(self._sin.td, "%s.acc" % n)
            _lp = os.path.join(self._sin.td, "%s.lex" % n)
            print "currgp: %s, _gp: %s " % (currgp, _gp)
            self.write_cfg_lex(ambi_parse, _gp, _lp)
            self.write_stat(_gp, _lp)

            currgp = _gp
            currlp = _lp
            currparse = ambi_parse
            n += 1

        # run ambidexter on the minimised grammar
        ambisen = self.ambidxt.ambiguous(currgp, str(self._sin.ambit))
        if ambisen is not None:
            accsen = self.to_accent(ambisen, currlp)
            print "ambisen: " , ambisen
            print "accsen: " , accsen
            # pass the string from ambidexter to accent,
            # to minimise the grammar even further
            _gp, _lp, _ambip = self.run_accent(accsen, currgp, currlp)
            self.write_stat(_gp, _lp, '*')
            return _gp, _lp, _ambip.amb_str

        # AmbiDexter didn't find anything
        self.write_stat(None, None, '*')
        return currgp, currlp, ambi_parse.amb_str
Example #10
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1
        found = True

        while found:
            found = False
            lex = Lexer.parse(open(currlp, 'r').read())
            cfg = CFG.parse(lex, open(currgp, 'r').read())
            combs = self.rule_alts_combs(cfg)
            random.shuffle(combs)
            while combs:
                key, i = combs.pop()
                _cfg = self.cfg_minus_alt(cfg, key, i)
                if self.valid_cfg(_cfg):
                    # we could minimise lex first before pruning
                    _cfg_p = self.prune_cfg(_cfg, lex)
                    _gf, _lf = "%s.acc" % n, "%s.lex" % n
                    _gp = os.path.join(self._sin.td, "pruned.%s" % _gf)
                    CFG.write(_cfg_p, _gp)
                    n += 1
                    amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp,
                                       self._sin.backend, self._sin.mint)
                    if amb:
                        ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
                        __gp = os.path.join(self._sin.td, "min.%s" % _gf)
                        __lp = os.path.join(self._sin.td, "min.%s" % _lf)
                        self.write_cfg_lex(ambi_parse, __gp, __lp)
                        self.write_stat(__gp, __lp)
                        found = True
                        currparse = ambi_parse
                        currgp = __gp
                        currlp = __lp
                        break

        return currgp, currlp, currparse.amb_str