Beispiel #1
0
def stream_parse_datetime(stream,
                          field,
                          input_timezone,
                          output_timezone,
                          output_date=None,
                          output_time=None,
                          output_hour=None,
                          on_error=BabeBase.ON_ERROR_WARN):
    input_tz = timezone(input_timezone)
    output_tz = timezone(output_timezone)
    header = None
    for row in stream:
        if isinstance(row, StreamHeader):
            added_fields = [
                f for f in [output_time, output_date, output_hour]
                if f and not f in row.fields
            ]
            if added_fields:
                header = row.insert(None, added_fields)
            else:
                header = row
            yield header
        elif isinstance(row, StreamMeta):
            yield row
        else:
            try:
                time_value = input_tz.localize(
                    parse_datetime(getattr(row, field)))
                time_value_ext = time_value.astimezone(output_tz)
                d = row._asdict()
                if output_time:
                    d[output_time] = time_value_ext
                if output_date:
                    date = datetime.date(time_value_ext.year,
                                         time_value_ext.month,
                                         time_value_ext.day)
                    d[output_date] = date
                if output_hour:
                    d[output_hour] = time_value_ext.hour
                yield header.t(**d)
            except Exception, e:
                if on_error == BabeBase.ON_ERROR_WARN:
                    BabeBase.log_warn("parse_time", row, e)
                elif on_error == BabeBase.ON_ERROR_FAIL:
                    raise e
                elif on_error == BabeBase.ON_ERROR_SKIP:
                    pass
                elif on_error == BabeBase.ON_ERROR_NONE:
                    d = row._asdict()
                    for k in [output_time, output_date, output_hour]:
                        if k:
                            d[k] = None
                    yield header.t(**d)
Beispiel #2
0
def join(stream,
         join_stream,
         key,
         join_key,
         add_fields=None,
         on_error=BabeBase.ON_ERROR_WARN):
    d = {}
    join_header = None
    for row in join_stream:
        if isinstance(row, StreamHeader):
            join_header = row
        elif isinstance(row, StreamFooter):
            break
        else:
            k = getattr(row, join_key)
            if not k in d:
                d[k] = row

    for row in stream:
        if isinstance(row, StreamHeader):
            if add_fields:
                fields = add_fields
            else:
                fields = [
                    field for field in join_header.fields if field != join_key
                ]
            header = row.insert(typename=None, fields=fields)
            yield header
        elif isinstance(row, StreamMeta):
            yield row
        else:
            k = getattr(row, key)
            if k in d:
                dd = row._asdict()
                jrow = d[k]
                for field in fields:
                    dd[field] = getattr(jrow, field)
                yield header.t(**dd)
            else:
                if on_error == BabeBase.ON_ERROR_WARN:
                    BabeBase.log_warn("join", row,
                                      "Not matching value for key")
                elif on_error == BabeBase.ON_ERROR_FAIL:
                    raise Exception("No matching value for key %s" % k)
                elif on_error == BabeBase.ON_ERROR_NONE:
                    dd = row._asdict()
                    for f in fields:
                        dd[f] = None
                    yield header.t(**dd)
                elif on_error == BabeBase.ON_ERROR_SKIP:
                    pass
def stream_parse_datetime(stream,
                          field,
                          input_timezone,
                          output_timezone,
                          output_date=None,
                          output_time=None,
                          output_hour=None,
                          on_error=BabeBase.ON_ERROR_WARN):
    input_tz = timezone(input_timezone)
    output_tz = timezone(output_timezone)
    header = None
    for row in stream:
        if isinstance(row, StreamHeader):
            added_fields = [f for f in [output_time, output_date, output_hour]
                            if f and f not in row.fields]
            if added_fields:
                header = row.insert(None, added_fields)
            else:
                header = row
            yield header
        elif isinstance(row, StreamMeta):
            yield row
        else:
            try:
                time_value = input_tz.localize(parse_datetime(getattr(row, field)))
                time_value_ext = time_value.astimezone(output_tz)
                d = row._asdict()
                if output_time:
                    d[output_time] = time_value_ext
                if output_date:
                    date = datetime.date(time_value_ext.year,
                                         time_value_ext.month,
                                         time_value_ext.day)
                    d[output_date] = date
                if output_hour:
                    d[output_hour] = time_value_ext.hour
                yield header.t(**d)
            except Exception as e:
                if on_error == BabeBase.ON_ERROR_WARN:
                    BabeBase.log_warn("parse_time", row, e)
                elif on_error == BabeBase.ON_ERROR_FAIL:
                    raise e
                elif on_error == BabeBase.ON_ERROR_SKIP:
                    pass
                elif on_error == BabeBase.ON_ERROR_NONE:
                    d = row._asdict()
                    for k in [output_time, output_date, output_hour]:
                        if k:
                            d[k] = None
                    yield header.t(**d)
Beispiel #4
0
def join(stream, join_stream, key,join_key, add_fields=None, on_error=BabeBase.ON_ERROR_WARN):
	d = {}
	join_header = None
	for row in join_stream:
		if isinstance(row, StreamHeader):
			join_header = row
		elif isinstance(row, StreamFooter): 
			break 
		else: 
			k = getattr(row, join_key)
			if not k in d: 
				d[k] = row

	for row in stream: 
		if isinstance(row, StreamHeader):
			if add_fields:
				fields = add_fields
			else:
				fields = [field for field in join_header.fields if field != join_key]
			header = row.insert(typename=None, fields=fields)
			yield header
		elif isinstance(row, StreamMeta):
			yield row
		else: 
			k = getattr(row, key)
			if k in d: 
				dd = row._asdict()
				jrow = d[k]
				for field in fields: 
					dd[field] = getattr(jrow, field)
				yield header.t(**dd)
			else: 
				if on_error == BabeBase.ON_ERROR_WARN: 
					BabeBase.log_warn("join", row, "Not matching value for key")
				elif on_error == BabeBase.ON_ERROR_FAIL:
					raise Exception("No matching value for key %s" % k)
				elif on_error == BabeBase.ON_ERROR_NONE:
					dd = row._asdict()
					for f in fields:
						dd[f] = None
					yield header.t(**dd) 
				elif on_error == BabeBase.ON_ERROR_SKIP:
					pass