예제 #1
0
class BaseTemplate:
    
    def __init__(self, tolerance):
        self.template = Template(tolerance=tolerance)
    
    def learn(self, inputs):
        if not inputs: return None
        map(self.template.learn, inputs)
    
    def extract(self, sample):
        return self.template.extract(sample)
    
    def pattern(self, urls):
        tmp = self.template.as_text(PLACE_HOLDER)
        cols = {}
        for url in urls:
            cols = self.checkFieldType(url, cols)
        for col in xrange(0, len(cols)):
            tmp = re.sub(PLACE_HOLDER, cols[col], tmp)
        return tmp
    
    def checkFieldType(self, url, cols):
        col = 0
        fields = self.extract(url)
        for field in fields:
            if col not in cols:
                cols[col] = '(\d+)'
            try:
                isInt = int(field)
            except:
                cols[col] = '(.+)'
        return cols
예제 #2
0
 def create(self, tolerance, *inputs):
     """
     "Helper method that returns a Template with the given tolerance and
     inputs.
     """
     t = Template(tolerance=tolerance)
     for i in inputs:
         t.learn(i)
     return t
예제 #3
0
 def create(self, tolerance, *inputs):
     """
     "Helper method that returns a Template with the given tolerance and
     inputs.
     """
     t = Template(tolerance=tolerance)
     for i in inputs:
         t.learn(i)
     return t
예제 #4
0
 def __init__(self, tolerance):
     self.template = Template(tolerance=tolerance)