def _serialize_datetime(val): """ Serialize Bson and python datetime types Args: val: datetime value Returns: serialized datetime value """ if isinstance(val, bson.datetime.datetime): timezone = tzlocal.get_localzone() try: local_datetime = timezone.localize(val) utc_datetime = local_datetime.astimezone(pytz.UTC) except Exception as ex: if str(ex) == 'year is out of range' and val.year == 0: # NB: Since datetimes are persisted as strings, it doesn't # make sense to blow up on invalid Python datetimes (e.g., # year=0). In this case we're formatting it as a string and # passing it along down the pipeline. return '{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:06d}Z'.format( val.year, val.month, val.day, val.hour, val.minute, val.second, val.microsecond) raise MongoDBInvalidDatetimeError( 'Found invalid datetime {}'.format(val)) return singer_strftime(utc_datetime) if isinstance(val, datetime.datetime): timezone = tzlocal.get_localzone() local_datetime = timezone.localize(val) utc_datetime = local_datetime.astimezone(pytz.UTC) return singer_strftime(utc_datetime) return None
def safe_transform_datetime(value: datetime.datetime, path) -> str: """ Safely transform datetime from local tz to UTC if applicable Args: value: datetime value to transform path: Returns: utc datetime as string """ timezone = tzlocal.get_localzone() try: local_datetime = timezone.localize(value) utc_datetime = local_datetime.astimezone(pytz.UTC) except Exception as ex: if str(ex) == 'year is out of range' and value.year == 0: # NB: Since datetimes are persisted as strings, it doesn't # make sense to blow up on invalid Python datetimes (e.g., # year=0). In this case we're formatting it as a string and # passing it along down the pipeline. return '{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:06d}Z'.format( value.year, value.month, value.day, value.hour, value.minute, value.second, value.microsecond, ) raise MongoDBInvalidDatetimeError( 'Found invalid datetime at [{}]: {}'.format('.'.join(map(str, path)), value) ) from ex return singer_strftime(utc_datetime)
def transform_value(self, key, value): if key in self.datetime_fields and value: value = datetime.datetime.fromtimestamp(value/1000, pytz.utc) # reformat to use RFC3339 format value = singer_strftime(value) return value
def class_to_string(key_value: Any, key_type: str) -> str: """ Converts specific types to string equivalent The supported types are: datetime, bson Timestamp, bytes, int, Int64, float, ObjectId, str and UUID Args: key_value: The value to convert to string key_type: the value type Returns: string equivalent of key value Raises: UnsupportedKeyTypeException if key_type is not supported """ if key_type == 'datetime': if key_value.tzinfo is None: timezone = tzlocal.get_localzone() local_datetime = timezone.localize(key_value) utc_datetime = local_datetime.astimezone(pytz.UTC) else: utc_datetime = key_value.astimezone(pytz.UTC) return singer_strftime(utc_datetime) if key_type == 'Timestamp': return '{}.{}'.format(key_value.time, key_value.inc) if key_type == 'bytes': return base64.b64encode(key_value).decode('utf-8') if key_type in ['int', 'Int64', 'float', 'ObjectId', 'str', 'UUID']: return str(key_value) raise UnsupportedKeyTypeException('{} is not a supported key type'.format(key_type))
def default(self, o): # false positive complaint -> pylint: disable=E0202 """ Custom function to serialize several sort of BSON and Python types Args: obj: Object to serialize Returns: Serialized value """ encoding_map = { bson.objectid.ObjectId: str, uuid.UUID: str, bson.int64.Int64: str, bson.timestamp.Timestamp: lambda value: singer_strftime(value.as_datetime()), bytes: lambda value: base64.b64encode(value).decode('utf-8'), bson.decimal128.Decimal128: lambda val: val.to_decimal(), bson.regex.Regex: lambda val: dict(pattern=val.pattern, flags=val.flags), bson.code.Code: lambda val: dict(value=str(val), scope=str(val.scope)) if val.scope else str(val), bson.dbref.DBRef: lambda val: dict(id=str(val.id), collection=val.collection, database=val.database), datetime.datetime: self._serialize_datetime, bson.datetime.datetime: self._serialize_datetime } if o.__class__ in encoding_map: return encoding_map[o.__class__](o) return super(MongoDBJsonEncoder, self).default(o)
def transform_value(self, key, value): if key in self.datetime_fields and value: value = parser.parse(value) value = value.replace(tzinfo=pytz.utc) # reformat to use RFC3339 format value = singer_strftime(value) return value
def transform_date(datestr): if datestr.startswith('='): # datestr might not include floating numbers in seconds, raising ValueError. try: date_obj = datetime.datetime.strptime( datestr, '="%Y-%m-%d %H:%M:%S.%f"').replace(tzinfo=pytz.UTC) except ValueError: date_obj = datetime.datetime.strptime( datestr, '="%Y-%m-%d %H:%M:%S"').replace(tzinfo=pytz.UTC) else: date_obj = strptime_to_utc(datestr) # reformat to use RFC3339 format value = singer_strftime(date_obj) return value
def transform_value(value: Any, path) -> Any: """ transform values to json friendly ones Args: value: value to transform path: Returns: transformed value """ conversion = { list: lambda val, pat: list( map(lambda v: transform_value(v[1], pat + [v[0]]), enumerate(val)) ), dict: lambda val, pat: {k: transform_value(v, pat + [k]) for k, v in val.items()}, uuid.UUID: lambda val, _: class_to_string(val, 'UUID'), bson.objectid.ObjectId: lambda val, _: class_to_string(val, 'ObjectId'), bson.datetime.datetime: safe_transform_datetime, bson.timestamp.Timestamp: lambda val, _: singer_strftime(val.as_datetime()), bson.int64.Int64: lambda val, _: class_to_string(val, 'Int64'), bytes: lambda val, _: class_to_string(val, 'bytes'), datetime.datetime: lambda val, _: class_to_string(val, 'datetime'), bson.decimal128.Decimal128: lambda val, _: val.to_decimal(), bson.regex.Regex: lambda val, _: dict(pattern=val.pattern, flags=val.flags), bson.code.Code: lambda val, _: dict(value=str(val), scope=str(val.scope)) if val.scope else str(val), bson.dbref.DBRef: lambda val, _: dict(id=str(val.id), collection=val.collection, database=val.database), } if isinstance(value, tuple(conversion.keys())): return conversion[type(value)](value, path) return value
def safe_strftime(dt): # Different implementations of the C strftime lib # will render out years differently. This function # tries to use the Singer strftime func, then falls # back to a different implementation. # # If the strftime lib is different than the expected version, # then the resulting date will look like # # 4Y-01-01 12:00:00... # # This code catches this failure mode, and use an alternative fmt string res = singer_strftime(dt) if res.startswith('4Y'): res = dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ') return res
def transform_date(value): return singer_strftime( datetime.utcfromtimestamp(value).replace(tzinfo=pytz.UTC))