cups = line[i:j].strip() all_errors = {} while c_line: m = tuple(self.slices(c_line, length_c)) m = map(lambda s: s.strip(), m) consums = build_dict(self.headers_cons, m) consums['name'] = cups result, errors = self.measures_adapter.load(consums) if errors: logger.error(errors) all_errors.update(errors) measures.append(result) start += step c_line = line[start:start + step].strip() return measures, all_errors def parse_line(self, line): line = unicode(line.decode(self.encoding)) all_errors = {} ps, ps_errors = self.parse_ps(line) measures, measures_errors = self.parse_measures(line) parsed = {'ps': ps, 'measures': measures, 'orig': line} if ps_errors: all_errors.update(ps_errors) if measures_errors: all_errors.update(measures_errors) return parsed, all_errors register(Iberdrola)
self.fields = self.fields_ps def parse_line(self, line): slinia = tuple(unicode(line.decode(self.encoding)).split(self.delimiter)) slinia = map(lambda s: s.strip(), slinia) parsed = {'ps': {}, 'measures': {}, 'orig': line} data = build_dict(self.headers_ps, slinia) result, errors = self.adapter.load(data) if errors: logger.error(errors) parsed['ps'] = result return parsed, errors register(Endesa) class EndesaCons(Parser): delimiter = ';' pattern = '(SEVILLANA|FECSA|ERZ|UNELCO|GESA).INF2.SEG0[1-5].(zip|ZIP)$' encoding = "iso-8859-15" def __init__(self, strict=False): self.schema = EndesaMeasuresSchema(strict=strict) self.adapter = EndesaMeasuresAdapter(strict=strict) self.measures_adapter = self.adapter self.fields = [] self.headers = [] for f in sorted(self.schema.fields,
# passar previament la linia pel csv reader # per que agafi be els camps tot i les comes dins del camp direccio # per fer-ho cal passar-la a StringIO l = StringIO.StringIO(line) reader = csv.DictReader(l, fieldnames=self.headers_ps, delimiter=',') linia = reader.next() # nomes n'hi ha una parsed = {'ps': {}, 'orig': line} result, errors = self.adapter.load(linia) if errors: logger.error(errors) parsed['ps'] = result return parsed, errors register(Cnmc) class CnmcCons(Parser): # En el cas de les mesures, usem Schema per mantenir el format i # perque no hi trobarem mes comes que les delimiters pattern = '[0-9]{4}-[0-9]{2}-[0-9]{2}_electricidad_consumos.csv' encoding = "UTF-8" delimiter = ',' def __init__(self, strict=False): self.schema = CnmcMeasuresSchema(strict=strict) self.adapter = CnmcMeasuresAdapter(strict=strict) self.measures_adapter = self.adapter self.fields = [] self.headers = []
def parse_line(self, line): line = unicode(line.decode(self.encoding)) slinia = tuple(self.slices(line, self.slices_ps)) slinia = map(lambda s: s.strip(), slinia) pslist = slinia[0:len(self.fields_ps)] # Llista dels valors del tros que agafem dins dels sips data = build_dict(self.headers_ps, pslist) result, errors = self.ps_adapter.load(data) if errors: logger.error(errors) parsed = {'ps': result, 'measures': [], 'orig': line} return parsed, errors register(Hidrocantabrico) class HidrocantabricoMeasures(Parser): pattern = 'HIDROCANTABRICO_CO.*\.(zip|TXT)$' encoding = "iso-8859-15" def __init__(self, strict=False): self.schema = HidrocantabricoMeasuresSchema(strict=strict) self.adapter = HidrocantabricoMeasuresAdapter(strict=strict) self.measures_adapter = self.adapter self.fields = [] self.headers = [] self.measures_slices = [] for f in sorted(self.schema.fields, key=lambda f: self.schema.fields[f].metadata['position']):