def rescore(grammar, abs_pen=None): g = ebnf.parse(grammar) print "Loaded grammar from {}".format(grammar) nbests = glob.glob('*.nb.scores.txt') for nbest in nbests: fn = basename(nbest) n = ''.join(re.findall('\d+', fn)) with open(nbest) as nb, open('{}.nb.rescored-abs.txt'.format(n), 'w+') as rescored: total = check_output("wc -l {}".format(nbest), shell=True).split()[0] for i,line in enumerate(nb): line = line.split() lsc, asc = line[-2:] line = line[:-2] score = float(lsc) + 0.001 * float(asc) hyp_id = line[0] hyp = ' '.join(line[1:]) print "Parsing sentence {} of {}...".format(i+1, total) # find partial parses of hyp and rescore matches = [] for subg in g: match = g[subg].searchString(hyp) matches.append(match.asList()) matches = list(flatten(matches)) not_matched = len([word for word in hyp.split() if word not in matches]) if abs_pen: if not_matched >= 1: score -= 2.5 else: if not_matched >= 1: score -= 0.5 * not_matched rescored.write("{} {}\n".format(hyp_id, score))
'"', character - '"', {character - '"'}, '"'; meta_identifier = letter, {letter | digit}; integer = digit, {digit}; *) ''' table = {} #~ table['character'] = Word(printables, exact=1) #~ table['letter'] = Word(alphas + '_', exact=1) #~ table['digit'] = Word(nums, exact=1) table['terminal_string'] = sglQuotedString table['meta_identifier'] = Word(alphas+"_", alphas+"_"+nums) table['integer'] = Word(nums) print('Parsing EBNF grammar with EBNF parser...') parsers = ebnf.parse(grammar, table) ebnf_parser = parsers['syntax'] commentcharcount = 0 commentlocs = set() def tallyCommentChars(s,l,t): global commentcharcount,commentlocs # only count this comment if we haven't seen it before if l not in commentlocs: charCount = ( len(t[0]) - len(list(filter(str.isspace, t[0]))) ) commentcharcount += charCount commentlocs.add(l) return l,t #ordinarily, these lines wouldn't be necessary, but we are doing extra stuff with the comment expression ebnf.ebnfComment.setParseAction( tallyCommentChars )
def __init__(self, bnf): self.bnf = ebnf.parse(bnf) self.attlist = odict()
def get_parser(debug=True): # se non e' inizializzato # oppure se e' richiesto un parser # con stato debug diverso dall'attuale # lo (ri)compilo if ClipsEbnf._CACHED_CLIPS_EBNF == None \ or debug != ClipsEbnf._DEBUG: from icse.predicates.Predicate import PositivePredicate, TestPredicate,\ NegativePredicate, NccPredicate from icse.predicates.NotEq import NotEq from icse.predicates.Eq import Eq import os grammar_file = open(os.path.dirname(__file__)+'/clips.ebnf', 'r') grammar = grammar_file.read() table = {} table['float'] = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?').setParseAction(lambda s,l,t:float(t[0])) table['integer'] = pp.Word(pp.nums).setParseAction(lambda s,l,t:int(t[0][:])) table['string'] = pp.Word(pp.printables) table['symbol'] = pp.Word("".join( [ c for c in string.printable if c not in string.whitespace and c not in "\"'()&?|<~;" ] )) table['variable_symbol'] = pp.Word('?', "".join( [ c for c in string.printable if c not in string.whitespace and c not in "\"'()&?|<~;" ] ), 2) table['variable_undef'] = pp.Literal('?') table['quoted_text'] = pp.Combine(("'" + pp.CharsNotIn("'") + "'" ^ \ '"' + pp.CharsNotIn('"') + '"')) import icse.actions as actions #table['action_name'] = pp.Combine(pp.oneOf(" ".join(actions.Proxy.get_actions().keys())) + pp.Optional( pp.Literal(" ").suppress()) ) table['action_name'] = pp.Combine(pp.oneOf(actions.Proxy.get_actions().keys()) + pp.FollowedBy( pp.White() | pp.Literal(")") ) + pp.Optional( pp.Literal(" ").suppress()) ) import icse.functions as functions table['function_name'] = pp.Combine(pp.oneOf(" ".join(functions.Proxy.get_functions().keys())) + pp.Literal(" ").suppress()) import icse.predicates as predicates table['predicate_name'] = pp.Combine(pp.oneOf(" ".join(predicates.Proxy.get_predicates().keys())) + pp.Literal(" ").suppress()) table['MYCLIPS_directive'] = pp.Regex(r'\;\@(?P<command>\w+)\((?P<params>.+?)\)').setParseAction(lambda s,l,t: ('myclips-directive', (t['command'], t['params']))) parsers = ebnf.parse(grammar, table, debug) #parsers['comment'].setParseAction(lambda s,l,t:t[0][1:-1]) parsers['number'].setParseAction(lambda s,l,t:t[0][0]) parsers['rule_property'].setParseAction(lambda s,l,t: tuple([t[1], t[2][0]])) parsers['declaration'].setParseAction(lambda s,l,t: ('declare', dict(t[1][:]))) parsers['comment'].setParseAction(lambda s,l,t: ('description', t[0][1:-1])) parsers['rule_name'].setParseAction(lambda s,l,t: ('name', t[0])) parsers['conditional_element_group'].setParseAction(lambda s,l,t: ('lhs', t[0][:])) parsers['action_group'].setParseAction(lambda s,l,t: ('rhs', t[0][:])) parsers['defrule_construct'].setParseAction(lambda s,l,t: ('defrule', dict([x for x in t if isinstance(x, tuple)]))) parsers['constant'].setParseAction(lambda s,l,t:(Eq, t[0])) parsers['variable_symbol'].setParseAction(lambda s,l,t:(Variable, t[0][1:])) parsers['variable_undef'].setParseAction(lambda s,l,t:(Variable, None)) parsers['pattern_CE'].setParseAction(lambda s,l,t:(PositivePredicate, t[1][:])) parsers['not_term'].setParseAction(lambda s,l,t:(NotEq, t[1][1]) if t[1][0] == Eq else (Variable.withPredicate(NotEq), t[1][1]) if t[1][0] == Variable else (NotEq, t[1]) ) parsers['test_CE'].setParseAction(lambda s,l,t:(TestPredicate.withPredicate(t[2]), t[3][:])) parsers['assigned_pattern_CE'].setParseAction(lambda s,l,t:(PositivePredicate, t[2][1][:], t[0][1])) parsers['and_CE'].setParseAction(lambda s,l,t: [t[1][:]] ) parsers['not_CE'].setParseAction(lambda s,l,t: (NegativePredicate, t[1][1]) if t[1][0] == PositivePredicate else (NccPredicate, t[1])) parsers['predicate_name'].setParseAction(lambda s,l,t: _get_predicate_from_string(t[0]) ) parsers['function_name'].setParseAction(lambda s,l,t: _get_function_from_string(t[0]) ) # registra la funzione per la function call # per provare a riscrivere la funzione come fosse # una costante se tutti i termini della funzione sono costanti parsers['function_call'].setParseAction(ClipsEbnf._try_rewrite_staticfunction) parsers['term_function_call'].setParseAction(lambda s,l,t: t[0] if len(t) == 1 else t[1] ) parsers['deffacts_name'].setParseAction(lambda s,l,t: ('name', t[0])) parsers['rhs_pattern'].setParseAction(lambda s,l,t: [t[1][:]]) parsers['rhs_pattern_group'].setParseAction(lambda s,l,t: ('facts', t[0][:])) parsers['deffacts_construct'].setParseAction(lambda s,l,t: ('deffacts', dict([x for x in t if isinstance(x, tuple)]).get('facts'))) parsers['action_quoted_text'].setParseAction(lambda s,l,t: "".join(t) ) parsers['action_call'].setParseAction(lambda s,l,t: (t[1],t[2][:]) ) parsers['action_name'].setParseAction(lambda s,l,t: _get_action_from_string(t[0]) ) parsers['setstrategy_construct'].setParseAction(lambda s,l,t: ('set-strategy', _get_strategy_from_string(t[1]) )) #parsers['MYCLIPS_directive'].setParseAction(lambda s,l,t: ('myclips-directive', (t[0], t[1][:]) )) clipsComment = ( ";" + pp.NotAny('@') + pp.SkipTo("\n") ).setName("clips_comment") parsers['CLIPS_program'].setParseAction(lambda s,l,t: t[0][:]) parsers['CLIPS_program'].ignore(clipsComment) if debug: # vistualizzo informazioni su funzioni e predicati caricati print "Predicati caricati:" print "\t" + "\n\t".join(predicates.Proxy.get_predicates().keys()) print "Funzioni caricate:" print "\t" + "\n\t".join(functions.Proxy.get_functions().keys()) print "Azioni caricate:" print "\t" + "\n\t".join(actions.Proxy.get_actions().keys()) raw_input() ClipsEbnf._CACHED_CLIPS_EBNF = parsers ClipsEbnf._DEBUG = debug return ClipsEbnf._CACHED_CLIPS_EBNF['CLIPS_program']
#!/usr/bin/env python3 from pyparsing import * import ebnf ParserElement.enablePackrat() singleRawString = QuotedString(quoteChar='@"', endQuoteChar='"') doubleRawString = QuotedString(quoteChar='@@"', endQuoteChar='"@@', multiline=True) rawString = singleRawString | doubleRawString table = { # Non-greedy trick from http://pyparsing.wikispaces.com/share/view/178079 "Identifier": ~Literal('END') + Word(alphas, alphanums + "_"), "Number": Word(nums, nums + "_e.") ^ ("0b" + Word("01_")) ^ ("0o" + Word("01234567_")) ^ ("0x" + Word("0123456789abcdefABCDEF_")), "StringLiteral": Regex(r'"(?:[^"\r\n\\]|(?:\\\((?:[^")]|"[^"]*")*\))|(?:\\.))*"') | rawString, "restOfLine": restOfLine, "ExecBody": Regex(r".*?;"), } parsers = ebnf.parse(open("contrib/grammar/neon.ebnf").read(), table)
from pyparsing import * import ebnf ParserElement.enablePackrat() singleRawString = QuotedString(quoteChar='@"', endQuoteChar='"') doubleRawString = QuotedString(quoteChar='@@"', endQuoteChar='"@@', multiline=True) rawString = singleRawString | doubleRawString table = { # Non-greedy trick from http://pyparsing.wikispaces.com/share/view/178079 "Identifier": ~Literal('END') + Word(alphas, alphanums + "_"), "Number": Word(nums, nums + "_e.") ^ ("0b" + Word("01_")) ^ ("0o" + Word("01234567_")) ^ ("0x" + Word("0123456789abcdefABCDEF_")) ^ ("0#" + Word(nums) + "#" + Word(alphanums + "_")), "StringLiteral": Regex(r'"(?:[^"\r\n\\]|(?:\\\((?:[^")]|"[^"]*")*\))|(?:\\.))*"') | rawString, "restOfLine": restOfLine, "ExecBody": Regex(r".*?;"), } parsers = ebnf.parse(open("contrib/grammar/neon.ebnf").read(), table)