def stream_parse_datetime(stream, field, input_timezone, output_timezone, output_date=None, output_time=None, output_hour=None, on_error=BabeBase.ON_ERROR_WARN): input_tz = timezone(input_timezone) output_tz = timezone(output_timezone) header = None for row in stream: if isinstance(row, StreamHeader): added_fields = [ f for f in [output_time, output_date, output_hour] if f and not f in row.fields ] if added_fields: header = row.insert(None, added_fields) else: header = row yield header elif isinstance(row, StreamMeta): yield row else: try: time_value = input_tz.localize( parse_datetime(getattr(row, field))) time_value_ext = time_value.astimezone(output_tz) d = row._asdict() if output_time: d[output_time] = time_value_ext if output_date: date = datetime.date(time_value_ext.year, time_value_ext.month, time_value_ext.day) d[output_date] = date if output_hour: d[output_hour] = time_value_ext.hour yield header.t(**d) except Exception, e: if on_error == BabeBase.ON_ERROR_WARN: BabeBase.log_warn("parse_time", row, e) elif on_error == BabeBase.ON_ERROR_FAIL: raise e elif on_error == BabeBase.ON_ERROR_SKIP: pass elif on_error == BabeBase.ON_ERROR_NONE: d = row._asdict() for k in [output_time, output_date, output_hour]: if k: d[k] = None yield header.t(**d)
def join(stream, join_stream, key, join_key, add_fields=None, on_error=BabeBase.ON_ERROR_WARN): d = {} join_header = None for row in join_stream: if isinstance(row, StreamHeader): join_header = row elif isinstance(row, StreamFooter): break else: k = getattr(row, join_key) if not k in d: d[k] = row for row in stream: if isinstance(row, StreamHeader): if add_fields: fields = add_fields else: fields = [ field for field in join_header.fields if field != join_key ] header = row.insert(typename=None, fields=fields) yield header elif isinstance(row, StreamMeta): yield row else: k = getattr(row, key) if k in d: dd = row._asdict() jrow = d[k] for field in fields: dd[field] = getattr(jrow, field) yield header.t(**dd) else: if on_error == BabeBase.ON_ERROR_WARN: BabeBase.log_warn("join", row, "Not matching value for key") elif on_error == BabeBase.ON_ERROR_FAIL: raise Exception("No matching value for key %s" % k) elif on_error == BabeBase.ON_ERROR_NONE: dd = row._asdict() for f in fields: dd[f] = None yield header.t(**dd) elif on_error == BabeBase.ON_ERROR_SKIP: pass
def stream_parse_datetime(stream, field, input_timezone, output_timezone, output_date=None, output_time=None, output_hour=None, on_error=BabeBase.ON_ERROR_WARN): input_tz = timezone(input_timezone) output_tz = timezone(output_timezone) header = None for row in stream: if isinstance(row, StreamHeader): added_fields = [f for f in [output_time, output_date, output_hour] if f and f not in row.fields] if added_fields: header = row.insert(None, added_fields) else: header = row yield header elif isinstance(row, StreamMeta): yield row else: try: time_value = input_tz.localize(parse_datetime(getattr(row, field))) time_value_ext = time_value.astimezone(output_tz) d = row._asdict() if output_time: d[output_time] = time_value_ext if output_date: date = datetime.date(time_value_ext.year, time_value_ext.month, time_value_ext.day) d[output_date] = date if output_hour: d[output_hour] = time_value_ext.hour yield header.t(**d) except Exception as e: if on_error == BabeBase.ON_ERROR_WARN: BabeBase.log_warn("parse_time", row, e) elif on_error == BabeBase.ON_ERROR_FAIL: raise e elif on_error == BabeBase.ON_ERROR_SKIP: pass elif on_error == BabeBase.ON_ERROR_NONE: d = row._asdict() for k in [output_time, output_date, output_hour]: if k: d[k] = None yield header.t(**d)
def join(stream, join_stream, key,join_key, add_fields=None, on_error=BabeBase.ON_ERROR_WARN): d = {} join_header = None for row in join_stream: if isinstance(row, StreamHeader): join_header = row elif isinstance(row, StreamFooter): break else: k = getattr(row, join_key) if not k in d: d[k] = row for row in stream: if isinstance(row, StreamHeader): if add_fields: fields = add_fields else: fields = [field for field in join_header.fields if field != join_key] header = row.insert(typename=None, fields=fields) yield header elif isinstance(row, StreamMeta): yield row else: k = getattr(row, key) if k in d: dd = row._asdict() jrow = d[k] for field in fields: dd[field] = getattr(jrow, field) yield header.t(**dd) else: if on_error == BabeBase.ON_ERROR_WARN: BabeBase.log_warn("join", row, "Not matching value for key") elif on_error == BabeBase.ON_ERROR_FAIL: raise Exception("No matching value for key %s" % k) elif on_error == BabeBase.ON_ERROR_NONE: dd = row._asdict() for f in fields: dd[f] = None yield header.t(**dd) elif on_error == BabeBase.ON_ERROR_SKIP: pass