Ejemplo n.º 1
0
 def flatten(self, row):
     if not self.flatteninfo:
         yield row
         return
     counters = [-1 for _ in self.flatteninfo]
     while True:
         newrow = copy.deepcopy(row)
         for i, flatten in enumerate(self.flatteninfo):
             colname = flatten['original']
             match = template.search(colname)
             if not match:
                 raise ValueError('Column name for flattening lacks an incrementing number!')
             template_string = match.group()
             if counters[i] == -1:
                 replace_string = template_string[2:-2]
                 counters[i] = int(replace_string)
             else:
                 replace_string = '%d' % counters[i]
             colname = colname.replace(template_string, replace_string)
             if colname not in row:
                 return
             newrow[flatten['new']] = row[colname]
             extracol = flatten.get('extracol')
             if extracol:
                 newrow[extracol] = colname
             counters[i] += 1
         yield newrow
Ejemplo n.º 2
0
def get_url(url, **kwargs):
    for kwarg in kwargs:
        exec('%s=%s' % (kwarg, kwargs[kwarg]))
    match = template.search(url)
    if match:
        template_string = match.group()
        replace_string = eval(template_string[2:-2])
        url = url.replace(template_string, replace_string)
    return url
Ejemplo n.º 3
0
 def get_adm(admcol, i):
     match = template.search(admcol)
     if match:
         template_string = match.group()
         admcol = self.headers[int(template_string[2:-2])]
     adm = row[admcol]
     if not adm:
         return False
     adms[i] = row[admcol].strip()
     return self.admininfo.get_adm(adms, i, scrapername)