Example #1
0
File: lens.py Project: sucof/pulsar
 def match(self):
     # tokenize the message
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     if len(tokens) != len(self.template.content):
         # no match due to different token count
         return None
     fields = []
     match = True
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
         else:
             if urllib.quote(curVal) != tok:
                 match = False
                 break
     if match:
         ret = fields
     else:
         ret = None
     return ret
Example #2
0
 def match(self):
     # tokenize the message
     # somehow self.lexerType is a boolean..?
     # thus always ngrams was picked and failed
     # due to token length missmatch
     # if self.lexerType == LEXER_TOKENS:
     if self.lexerType == False:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     if len(tokens) != len(self.template.content):
         # no match due to different token count
         return None
     fields = []
     match = True
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
         else:
             if urllib.quote(curVal) != tok:
                 match = False
                 break
     if match:
         ret = fields
     else:
         ret = None
     return ret
Example #3
0
File: lens.py Project: sucof/pulsar
 def match(self):
     # tokenize the message
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     fields = []
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
     # find distance between msg and template
     template_str = ''.join([t for t in self.template.content])
     d = distance(self.msg, template_str)
     return fields, d
Example #4
0
 def match(self):
     # tokenize the message
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     fields = []
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
     # find distance between msg and template
     template_str = ''.join([t for t in self.template.content])
     d = distance(self.msg, template_str)
     return fields, d
Example #5
0
 def match(self):
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     if len(tokens) != len(self.template.content):
         # no match due to different token count
         return None
     fields = []
     match = True
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
         else:
             if urllib.quote(curVal) != tok:
                 match = False
                 break
     if match:
         ret = fields
     else:
         ret = None
     return ret
Example #6
0
 def getTokensForMsg(self, msgIndex):
     if self.ngram == 0:
         return scanTokens(self.messages[msgIndex], self.whitespace)
     else:
         return scanNgrams(self.messages[msgIndex])
Example #7
0
 def getTokensForMsg(self, msgIndex):
     if self.ngram == 0:
         return scanTokens(self.messages[msgIndex], self.whitespace)
     else:
         return scanNgrams(self.messages[msgIndex])