class BaseTemplate: def __init__(self, tolerance): self.template = Template(tolerance=tolerance) def learn(self, inputs): if not inputs: return None map(self.template.learn, inputs) def extract(self, sample): return self.template.extract(sample) def pattern(self, urls): tmp = self.template.as_text(PLACE_HOLDER) cols = {} for url in urls: cols = self.checkFieldType(url, cols) for col in xrange(0, len(cols)): tmp = re.sub(PLACE_HOLDER, cols[col], tmp) return tmp def checkFieldType(self, url, cols): col = 0 fields = self.extract(url) for field in fields: if col not in cols: cols[col] = '(\d+)' try: isInt = int(field) except: cols[col] = '(.+)' return cols
def create(self, tolerance, *inputs): """ "Helper method that returns a Template with the given tolerance and inputs. """ t = Template(tolerance=tolerance) for i in inputs: t.learn(i) return t
def __init__(self, tolerance): self.template = Template(tolerance=tolerance)