def flatten(self, row): if not self.flatteninfo: yield row return counters = [-1 for _ in self.flatteninfo] while True: newrow = copy.deepcopy(row) for i, flatten in enumerate(self.flatteninfo): colname = flatten['original'] match = template.search(colname) if not match: raise ValueError('Column name for flattening lacks an incrementing number!') template_string = match.group() if counters[i] == -1: replace_string = template_string[2:-2] counters[i] = int(replace_string) else: replace_string = '%d' % counters[i] colname = colname.replace(template_string, replace_string) if colname not in row: return newrow[flatten['new']] = row[colname] extracol = flatten.get('extracol') if extracol: newrow[extracol] = colname counters[i] += 1 yield newrow
def get_url(url, **kwargs): for kwarg in kwargs: exec('%s=%s' % (kwarg, kwargs[kwarg])) match = template.search(url) if match: template_string = match.group() replace_string = eval(template_string[2:-2]) url = url.replace(template_string, replace_string) return url
def get_adm(admcol, i): match = template.search(admcol) if match: template_string = match.group() admcol = self.headers[int(template_string[2:-2])] adm = row[admcol] if not adm: return False adms[i] = row[admcol].strip() return self.admininfo.get_adm(adms, i, scrapername)