Beispiel #1
0
def _check_timedelta(td):
    """
    Convert strings to Timedelta objects
    Allows for both shortform and longform units, as well as any form of capitalization
    '2 Minutes'
    '2 minutes'
    '2 m'
    '1 Minute'
    '1 minute'
    '1 m'
    '1 units'
    '1 Units'
    '1 u'
    Shortform is fine if space is dropped
    '2m'
    '1u"
    If a pd.Timedelta object is passed, units will be converted to seconds due to the underlying representation
        of pd.Timedelta.
    If a pd.DateOffset object is passed, it will be converted to a Featuretools Timedelta if it has one
        temporal parameter. Otherwise, it will remain a pd.DateOffset.
    """
    if td is None:
        return td
    if isinstance(td, Timedelta):
        return td
    elif not isinstance(td, (int, float, str, pd.DateOffset, pd.Timedelta)):
        raise ValueError("Unable to parse timedelta: {}".format(td))
    if isinstance(td, pd.Timedelta):
        unit = 's'
        value = td.total_seconds()
        times = {unit: value}
        return Timedelta(times, delta_obj=td)
    elif isinstance(td, pd.DateOffset):
        # DateOffsets
        if td.__class__.__name__ == "DateOffset":
            times = dict()
            for td_unit, td_value in td.kwds.items():
                times[td_unit] = td_value
            return Timedelta(times, delta_obj=td)
        # Special offsets (such as BDay)
        else:
            unit = td.__class__.__name__
            value = td.__dict__['n']
            times = dict([(unit, value)])
            return Timedelta(times, delta_obj=td)
    else:
        pattern = '([0-9]+) *([a-zA-Z]+)$'
        match = re.match(pattern, td)
        value, unit = match.groups()
        try:
            value = int(value)
        except Exception:
            try:
                value = float(value)
            except Exception:
                raise ValueError(
                    "Unable to parse value {} from ".format(value) +
                    "timedelta string: {}".format(td))
        times = {unit: value}
        return Timedelta(times)
Beispiel #2
0
    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
        base_features = [dependencies[name] for name in arguments["base_features"]]
        relationship_path = [
            Relationship.from_dictionary(r, entityset)
            for r in arguments["relationship_path"]
        ]
        parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name
        relationship_path = RelationshipPath([(False, r) for r in relationship_path])

        use_previous_data = arguments["use_previous"]
        use_previous = use_previous_data and Timedelta.from_dictionary(
            use_previous_data
        )

        where_name = arguments["where"]
        where = where_name and dependencies[where_name]

        feat = cls(
            base_features=base_features,
            parent_dataframe_name=parent_dataframe_name,
            primitive=primitive,
            relationship_path=relationship_path,
            use_previous=use_previous,
            where=where,
            name=arguments["name"],
        )
        feat._names = arguments.get("feature_names")
        return feat
Beispiel #3
0
    def from_dictionary(cls, arguments, entityset, dependencies,
                        primitives_deserializer):
        base_features = [
            dependencies[name] for name in arguments['base_features']
        ]
        relationship_path = [
            Relationship.from_dictionary(r, entityset)
            for r in arguments['relationship_path']
        ]
        parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name
        relationship_path = RelationshipPath([(False, r)
                                              for r in relationship_path])

        primitive = primitives_deserializer.deserialize_primitive(
            arguments['primitive'])

        use_previous_data = arguments['use_previous']
        use_previous = use_previous_data and Timedelta.from_dictionary(
            use_previous_data)

        where_name = arguments['where']
        where = where_name and dependencies[where_name]

        return cls(base_features=base_features,
                   parent_dataframe_name=parent_dataframe_name,
                   primitive=primitive,
                   relationship_path=relationship_path,
                   use_previous=use_previous,
                   where=where,
                   name=arguments['name'])
Beispiel #4
0
def _check_timedelta(td, entity_id=None, related_entity_id=None):
    """
    Convert strings to Timedelta objects
    Allows for both shortform and longform units, as well as any form of capitalization
    '2 Minutes'
    '2 minutes'
    '2 m'
    '1 Minute'
    '1 minute'
    '1 m'
    '1 units'
    '1 Units'
    '1 u'
    Shortform is fine if space is dropped
    '2m'
    '1u"
    If a pd.Timedelta object is passed, units will be converted to seconds due to the underlying representation
        of pd .Timedelta.
    """
    if td is None:
        return td
    if isinstance(td, Timedelta):
        return td
    elif not (is_string(td) or isinstance(td, pd.Timedelta)
              or isinstance(td, (int, float))):
        raise ValueError("Unable to parse timedelta: {}".format(td))

    value = None
    try:
        value = int(td)
    except Exception:
        try:
            value = float(td)
        except Exception:
            pass
    if isinstance(td, pd.Timedelta):
        unit = 's'
        value = td.total_seconds()
    else:
        pattern = '([0-9]+) *([a-zA-Z]+)$'
        match = re.match(pattern, td)
        value, unit = match.groups()
        try:
            value = int(value)
        except Exception:
            try:
                value = float(value)
            except Exception:
                raise ValueError(
                    "Unable to parse value {} from ".format(value) +
                    "timedelta string: {}".format(td))
    return Timedelta(value, unit)
Beispiel #5
0
def _check_timedelta(td, entity_id=None, related_entity_id=None):
    """
    Convert strings to Timedelta objects
    Allows for both shortform and longform units, as well as any form of capitalization
    '2 Minutes'
    '2 minutes'
    '2 m'
    '1 Minute'
    '1 minute'
    '1 m'
    '1 units'
    '1 Units'
    '1 u'
    Shortform is fine if space is dropped
    '2m'
    '1u"
    When using generic units, can drop the unit
    1
    2
    '1'
    '2'
    When using observations, need to provide an entity as either a tuple or a separate arg
    ('2o', 'logs')
    ('2 o', 'logs')
    ('2 Observations', 'logs')
    ('2 observations', 'logs')
    ('2 observation', 'logs')
    If an entity is provided and no unit is provided, assume observations (instead of generic units)
    (2, 'logs')
    ('2', 'logs')



    """
    if td is None:
        return td
    if isinstance(td, Timedelta):
        if td.entity is not None and entity_id is not None and td.entity != entity_id:
            raise ValueError("Timedelta entity {} different from passed entity {}".format(td.entity, entity_id))
        if td.entity is not None and related_entity_id is not None and td.entity == related_entity_id:
            raise ValueError("Timedelta entity {} same as passed related entity {}".format(td.entity, related_entity_id))
        return td
    elif not isinstance(td, (basestring, tuple, int, float)):
        raise ValueError("Unable to parse timedelta: {}".format(td))

    # TODO: allow observations from an entity in string

    if isinstance(td, tuple):
        if entity_id is None:
            entity_id = td[1]
        td = td[0]

    value = None
    try:
        value = int(td)
    except Exception:
        try:
            value = float(td)
        except Exception:
            pass
    if value is not None and entity_id is not None:
        unit = 'o'
    elif value is not None:
        unit = 'u'
    else:
        pattern = '([0-9]+) *([a-zA-Z]+)$'
        match = re.match(pattern, td)
        value, unit = match.groups()
        try:
            value = int(value)
        except Exception:
            try:
                value = float(value)
            except Exception:
                raise ValueError("Unable to parse value {} from ".format(value) +
                                 "timedelta string: {}".format(td))
    return Timedelta(value, unit, entity=entity_id)