def generateRules(self): learnedRules = fieldlearner.learn(self._sourceField, self._markedUpEvents, self._counterExamples) # keep edited rules editedRules = [r for r in self._rules if r.isEdited()] # empty existing rules self._rules = [] # for each learned rule for lrule in learnedRules: pattern = lrule.getPattern() fieldinfo = lrule.getFieldValues() # for each edited rule, if we have a match with the newly learned rule, don't add it for r in editedRules: if r._pattern() == pattern: break else: # add rule that doesn't match any edited rule rule = Rule(pattern, fieldinfo) self._rules.append(rule) self._rules.extend(editedRules)
pos = 0 vals = splitExampleValues(exampleSet) # PREVENT ABUSE if len(vals) > MAX_FIELDS: addMessage(args, _("Too many fields specified for extraction. Using first %s values.") % MAX_FIELDS, CWARN) vals = vals[:MAX_FIELDS] for example in vals: pos += 1 llog("EXAMPLE: %s" % example) if example in raw: llog("FOUND: example %s raw %s" % (example, raw)) markedEvent["FIELDNAME%s" % pos] = example markedEvents[i] = markedEvent for i, me in markedEvents.items(): llog("ME: %s" % me) rules = mfl.learn("_raw", markedEvents, counterExamples) regexes = [rule.getPattern() for rule in rules] llog("EXAMPLES: %s %s" % (examples, type(examples))) llog("REGEXES: %s" % regexes) # for id, e in markedEvents.items(): # for rule in rules: # extractions = rule.findExtractions(e) # regexes, extractions = mfl.learn(events, examples, args['counterexamples']) if len(regexes) > 0: regex = regexes[0] except Exception, e: llog("PROBLEM: %s" % e) import traceback llog(traceback.format_exc())
raw = event markedEvent["_event"] = { sourceField : raw } for exampleSet in examples: # !! hack pos = 0 vals = splitExampleValues(exampleSet) for example in vals: pos += 1 llog("EXAMPLE: %s" % example) if example in raw: llog("FOUND: example %s raw %s" % (example, raw)) markedEvent["FIELDNAME%s" % pos] = example markedEvents[i] = markedEvent for i, me in markedEvents.items(): llog("ME: %s" % me) rules = mfl.learn("_raw", markedEvents, counterExamples) regexes = [rule.getPattern() for rule in rules] llog("EXAMPLES: %s %s" % (examples, type(examples))) llog("REGEXES: %s" % regexes) # for id, e in markedEvents.items(): # for rule in rules: # extractions = rule.findExtractions(e) # regexes, extractions = mfl.learn(events, examples, args['counterexamples']) if len(regexes) > 0: regex = regexes[0] except Exception, e: llog("PROBLEM: %s" % e) import traceback llog(traceback.format_exc())