예제 #1
0
    def _serialize_datetime(val):
        """
        Serialize Bson and python datetime types
        Args:
            val: datetime value

        Returns: serialized datetime value

        """
        if isinstance(val, bson.datetime.datetime):
            timezone = tzlocal.get_localzone()
            try:
                local_datetime = timezone.localize(val)
                utc_datetime = local_datetime.astimezone(pytz.UTC)
            except Exception as ex:
                if str(ex) == 'year is out of range' and val.year == 0:
                    # NB: Since datetimes are persisted as strings, it doesn't
                    # make sense to blow up on invalid Python datetimes (e.g.,
                    # year=0). In this case we're formatting it as a string and
                    # passing it along down the pipeline.
                    return '{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:06d}Z'.format(
                        val.year, val.month, val.day, val.hour, val.minute,
                        val.second, val.microsecond)
                raise MongoDBInvalidDatetimeError(
                    'Found invalid datetime {}'.format(val))

            return singer_strftime(utc_datetime)

        if isinstance(val, datetime.datetime):
            timezone = tzlocal.get_localzone()
            local_datetime = timezone.localize(val)
            utc_datetime = local_datetime.astimezone(pytz.UTC)
            return singer_strftime(utc_datetime)
        return None
예제 #2
0
def safe_transform_datetime(value: datetime.datetime, path) -> str:
    """
    Safely transform datetime from local tz to UTC if applicable
    Args:
        value: datetime value to transform
        path:

    Returns: utc datetime as string

    """
    timezone = tzlocal.get_localzone()
    try:
        local_datetime = timezone.localize(value)
        utc_datetime = local_datetime.astimezone(pytz.UTC)
    except Exception as ex:
        if str(ex) == 'year is out of range' and value.year == 0:
            # NB: Since datetimes are persisted as strings, it doesn't
            # make sense to blow up on invalid Python datetimes (e.g.,
            # year=0). In this case we're formatting it as a string and
            # passing it along down the pipeline.
            return '{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:06d}Z'.format(
                value.year,
                value.month,
                value.day,
                value.hour,
                value.minute,
                value.second,
                value.microsecond,
            )
        raise MongoDBInvalidDatetimeError(
            'Found invalid datetime at [{}]: {}'.format('.'.join(map(str, path)), value)
        ) from ex
    return singer_strftime(utc_datetime)
예제 #3
0
    def transform_value(self, key, value):
        if key in self.datetime_fields and value:
            value = datetime.datetime.fromtimestamp(value/1000, pytz.utc)
            # reformat to use RFC3339 format
            value = singer_strftime(value)

        return value
예제 #4
0
def class_to_string(key_value: Any, key_type: str) -> str:
    """
    Converts specific types to string equivalent
    The supported types are: datetime, bson Timestamp, bytes, int, Int64, float, ObjectId, str and UUID
    Args:
        key_value: The value to convert to string
        key_type: the value type

    Returns: string equivalent of key value
    Raises: UnsupportedKeyTypeException if key_type is not supported
    """
    if key_type == 'datetime':
        if key_value.tzinfo is None:
            timezone = tzlocal.get_localzone()
            local_datetime = timezone.localize(key_value)
            utc_datetime = local_datetime.astimezone(pytz.UTC)
        else:
            utc_datetime = key_value.astimezone(pytz.UTC)

        return singer_strftime(utc_datetime)

    if key_type == 'Timestamp':
        return '{}.{}'.format(key_value.time, key_value.inc)

    if key_type == 'bytes':
        return base64.b64encode(key_value).decode('utf-8')

    if key_type in ['int', 'Int64', 'float', 'ObjectId', 'str', 'UUID']:
        return str(key_value)

    raise UnsupportedKeyTypeException('{} is not a supported key type'.format(key_type))
예제 #5
0
    def default(self, o): # false positive complaint -> pylint: disable=E0202
        """
        Custom function to serialize several sort of BSON and Python types
        Args:
            obj: Object to serialize

        Returns: Serialized value
        """
        encoding_map = {
            bson.objectid.ObjectId: str,
            uuid.UUID: str,
            bson.int64.Int64: str,
            bson.timestamp.Timestamp: lambda value: singer_strftime(value.as_datetime()),
            bytes: lambda value: base64.b64encode(value).decode('utf-8'),
            bson.decimal128.Decimal128: lambda val: val.to_decimal(),
            bson.regex.Regex: lambda val: dict(pattern=val.pattern, flags=val.flags),
            bson.code.Code: lambda val: dict(value=str(val), scope=str(val.scope)) if val.scope else str(val),
            bson.dbref.DBRef: lambda val: dict(id=str(val.id), collection=val.collection, database=val.database),
            datetime.datetime: self._serialize_datetime,
            bson.datetime.datetime: self._serialize_datetime
        }

        if o.__class__ in encoding_map:
            return encoding_map[o.__class__](o)

        return super(MongoDBJsonEncoder, self).default(o)
예제 #6
0
    def transform_value(self, key, value):
        if key in self.datetime_fields and value:
            value = parser.parse(value)
            value = value.replace(tzinfo=pytz.utc)
            # reformat to use RFC3339 format
            value = singer_strftime(value)

        return value
예제 #7
0
def transform_date(datestr):
    if datestr.startswith('='):
        # datestr might not include floating numbers in seconds, raising ValueError.
        try:
            date_obj = datetime.datetime.strptime(
                datestr, '="%Y-%m-%d %H:%M:%S.%f"').replace(tzinfo=pytz.UTC)
        except ValueError:
            date_obj = datetime.datetime.strptime(
                datestr, '="%Y-%m-%d %H:%M:%S"').replace(tzinfo=pytz.UTC)
    else:
        date_obj = strptime_to_utc(datestr)
    # reformat to use RFC3339 format
    value = singer_strftime(date_obj)
    return value
예제 #8
0
def transform_value(value: Any, path) -> Any:
    """
    transform values to json friendly ones
    Args:
        value: value to transform
        path:

    Returns: transformed value

    """
    conversion = {
        list:
        lambda val, pat: list(
            map(lambda v: transform_value(v[1], pat + [v[0]]), enumerate(val))
        ),
        dict:
        lambda val, pat:
        {k: transform_value(v, pat + [k])
         for k, v in val.items()},
        uuid.UUID:
        lambda val, _: class_to_string(val, 'UUID'),
        bson.objectid.ObjectId:
        lambda val, _: class_to_string(val, 'ObjectId'),
        bson.datetime.datetime:
        safe_transform_datetime,
        bson.timestamp.Timestamp:
        lambda val, _: singer_strftime(val.as_datetime()),
        bson.int64.Int64:
        lambda val, _: class_to_string(val, 'Int64'),
        bytes:
        lambda val, _: class_to_string(val, 'bytes'),
        datetime.datetime:
        lambda val, _: class_to_string(val, 'datetime'),
        bson.decimal128.Decimal128:
        lambda val, _: val.to_decimal(),
        bson.regex.Regex:
        lambda val, _: dict(pattern=val.pattern, flags=val.flags),
        bson.code.Code:
        lambda val, _: dict(value=str(val), scope=str(val.scope))
        if val.scope else str(val),
        bson.dbref.DBRef:
        lambda val, _:
        dict(id=str(val.id), collection=val.collection, database=val.database),
    }

    if isinstance(value, tuple(conversion.keys())):
        return conversion[type(value)](value, path)

    return value
예제 #9
0
def safe_strftime(dt):
    # Different implementations of the C strftime lib
    # will render out years differently. This function
    # tries to use the Singer strftime func, then falls
    # back to a different implementation.
    #
    # If the strftime lib is different than the expected version,
    # then the resulting date will look like
    #
    # 4Y-01-01 12:00:00...
    #
    # This code catches this failure mode, and use an alternative fmt string

    res = singer_strftime(dt)
    if res.startswith('4Y'):
        res = dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

    return res
예제 #10
0
def transform_date(value):
    return singer_strftime(
        datetime.utcfromtimestamp(value).replace(tzinfo=pytz.UTC))