def numpy_datetime64_to_python_datetime(x, allow_none=False): import numpy as np if isinstance(x, np.datetime64): # For some reason, the following doesn't always work. Instead of a Python datetime, an int may be returned. This # may be due to a bug in NumPy. This function detects this issue and employs an alternative strategy to convert # x to a Python datetime. r = x.astype(dt.datetime) if isinstance(x, dt.datetime): return r year = x.astype('datetime64[Y]').astype(int) + 1970 xm = x.astype('datetime64[M]') month = xm.astype(int) % 12 + 1 days = (x - xm) / np.timedelta64(1, 'D') timeindays = days - int(days) day = int(days) + 1 hour = int(timeindays * tc.HOURS_PER_DAY) timeindays -= hour / tc.HOURS_PER_DAY minute = int(timeindays * tc.MINUTES_PER_DAY) timeindays -= minute / tc.MINUTES_PER_DAY second = int(timeindays * tc.SECONDS_PER_DAY) timeindays -= second / tc.SECONDS_PER_DAY microsecond = int(timeindays * tc.MICROSECONDS_PER_DAY) r = dt.datetime(year, month, day, hour, minute, second, microsecond) if microsecond % 10 == 9: r += dt.timedelta(microseconds=1) return r elif checks.is_iterable(x): return [numpy_datetime64_to_python_datetime(e) for e in x] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python datetime' % str(x))
def split(self, purpose=('training', 'validation', 'test'), fraction=(.5, .25, .25)): logger = logging.getLogger() if not checks.is_iterable_not_string(purpose): purpose = [purpose] if not checks.is_iterable(fraction): fraction = [fraction] split_purposes = [] split_starts_inclusive = [] split_ends_exclusive = [] count_remaining = len(self.input_working) fraction_done = 0. count_done = 0 for p, f in zip(purpose, fraction): assert p in ('training', 'validation', 'test') split_purposes.append(p) next_count = int(count_remaining * f / (1. - fraction_done)) split_starts_inclusive.append(count_done) count_done += next_count split_ends_exclusive.append(count_done) count_remaining -= next_count fraction_done += f logger.info('A %s set: [%d, %d)' % (split_purposes[-1], split_starts_inclusive[-1], split_ends_exclusive[-1])) self.__is_split = True self.__split_purposes = tuple(split_purposes) self.__split_starts_inclusive = tuple(split_starts_inclusive) self.__split_ends_exclusive = tuple(split_ends_exclusive)
def to_python_datetime(x, allow_dates=True, date_for_times=dt.date.today(), allow_none=False, *args, **kwargs): import numpy as np import pandas as pd if isinstance(x, pd.Timestamp): return pandas_timestamp_to_python_datetime(x, *args, **kwargs) elif isinstance(x, np.datetime64): return numpy_datetime64_to_python_datetime(x, *args, **kwargs) elif isinstance(x, dt.datetime): return x elif date_for_times is not None and isinstance(x, dt.time): return dt.datetime.combine(date_for_times, x) elif allow_dates and isinstance(x, dt.date): return dt.datetime.combine(x, dt.time()) elif checks.is_string(x): return str_to_datetime(x, *args, **kwargs) elif checks.is_iterable(x): return [ to_python_datetime(e, allow_dates, date_for_times, *args, **kwargs) for e in x ] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python datetime' % str(x))
def add_ln(self, column=None, prefix='ln(', suffix=')', exclude_column_re=None, include_column_re=None, exclude_columns_with_negative_values=True): logger = logging.getLogger() if column is None: column = self.__input_df.columns if not checks.is_iterable(column): column = [column] if exclude_column_re is not None: exclude_column_re = re.compile(exclude_column_re) if include_column_re is not None: include_column_re = re.compile(include_column_re) for c in column: if include_column_re is not None and not include_column_re.match( c): logger.info('- Excluding column due to include_column_re: %s' % c) continue if exclude_column_re is not None and exclude_column_re.match(c): logger.info('- Excluding column due to exclude_column_re: %s' % c) continue if exclude_columns_with_negative_values and any( self.__input_df[c] < 0.): logger.info( '- Excluding column since it contains negative values: %s' % c) continue new_column_name = prefix + c + suffix logger.info('- Adding new ln column: %s' % new_column_name) self.__input_df[new_column_name] = self.__input_df[c].apply(np.log)
def mean_or_last(x): if isinstance(x, pd.DataFrame): return x.apply(mean_or_last) else: try: return np.mean(x) except: return x[-1] if checks.is_iterable(x) else x
def add_diff(self, column=None, prefix='diff(', suffix=')', exclude_column_re=None, include_column_re=None): logger = logging.getLogger() if column is None: column = self.__input_df.columns if not checks.is_iterable(column): column = [column] if exclude_column_re is not None: exclude_column_re = re.compile(exclude_column_re) if include_column_re is not None: include_column_re = re.compile(include_column_re) for c in column: if include_column_re is not None and not include_column_re.match( c): logger.info('- Excluding column due to include_column_re: %s' % c) continue if exclude_column_re is not None and exclude_column_re.match(c): logger.info('- Excluding column due to exclude_column_re: %s' % c) continue new_column_name = prefix + c + suffix logger.info('- Adding new diff column: %s' % new_column_name) self.__input_df[new_column_name] = self.__input_df[c].diff() try: self.__truncate_from_above = max( self.__truncate_from_above, list(self.__input_df[new_column_name].isnull().values). index(False)) except ValueError: self.__truncate_from_above = max(self.__truncate_from_above, len(self.__input_df))
def set_output(self, column, forecast_horizon=0, remove_from_input=None, difference_from_present=False): assert column is not None assert forecast_horizon is not None if not checks.is_iterable(forecast_horizon): forecast_horizon = [forecast_horizon] for fh in forecast_horizon: assert fh >= 0 if remove_from_input is None: remove_from_input = not all(forecast_horizon) if difference_from_present: self.__output_df = pd.concat([ self.__input_df[column].shift(-fh) - self.__input_df[column] for fh in forecast_horizon ], axis=1) else: self.__output_df = pd.concat([ self.__input_df[column].shift(-fh) for fh in forecast_horizon ], axis=1) self.__output_df.columns = [ 'forecast(' + str(fh) + ',' + column + ')' if fh > 0 else column for fh in forecast_horizon ] self.__output_base_df = self.__input_df[column].to_frame() if remove_from_input: del self.__input_df[column] self.__truncate_from_below = max(forecast_horizon)
def pandas_timestamp_to_python_datetime(x, allow_none=False): import pandas as pd if isinstance(x, pd.Timestamp): return x.to_pydatetime() elif checks.is_iterable(x): return [pandas_timestamp_to_python_datetime(e) for e in x] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python datetime' % str(x))
def pandas_timedelta_to_python_timedelta(x, allow_none=False): import pandas as pd if isinstance(x, pd.Timedelta): return x.to_pytimedelta() elif checks.is_iterable(x): return [pandas_timedelta_to_python_timedelta(e, allow_none) for e in x] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python timedelta' % str(x))
def __init__(self, obs_matrix): super().__init__() if not checks.is_numpy_array(obs_matrix) and not checks.is_iterable(obs_matrix): obs_matrix = (obs_matrix,) self._obs_matrix = npu.make_immutable( block_diag( *[npu.to_ndim_2(om, ndim_1_to_col=False, copy=False) for om in obs_matrix])) self._to_string_helper_KalmanFilterObsModel = None self._str_KalmanFilterObsModel = None
def numpy_timedelta64_to_python_timedelta(x, allow_none=False): import numpy as np import pandas as pd if isinstance(x, np.timedelta64): return pd.to_timedelta(x, errors='coerce', box=True).to_pytimedelta() elif checks.is_iterable(x): return [numpy_timedelta64_to_python_timedelta(e) for e in x] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python timedelta' % str(x))
def __init__(self, time, state_distr, process, weighting_func=None, particle_count=1000, observation_dim=1, random_state=None, predicted_observation_sampler=None, outlier_threshold=None, name=None, pype=None, pype_options=frozenset(filtering.FilterPypeOptions)): super().__init__(name) self._pype = pype self._pype_options = frozenset() if (pype_options is None or pype is None) else frozenset(pype_options) if not checks.is_iterable(process): process = (process,) process = checks.check_iterable_over_instances(process, proc.SolvedItoProcess) if weighting_func is None: weighting_func = KDEWeightingFunction() self._time = time self._observation_dim = observation_dim self._state_distr = state_distr self._processes = tuple(process) self._state_dim = sum([p.process_dim for p in self._processes]) self._weighting_func = weighting_func self._particle_count = particle_count self._current_particle_idx = None self._random_state = rnd.random_state() if random_state is None else random_state self._predicted_observation_sampler = predicted_observation_sampler self._prior_particles = np.empty((self._particle_count, self._state_dim)) self._resampled_particles = np.empty((self._particle_count, self._state_dim)) self._unnormalised_weights = np.empty((self._particle_count,)) self._weights = np.empty((self._particle_count,)) self._resampled_particles_uptodate = False self._last_observation = None self._cached_prior_mean = None self._cached_prior_var = None self._cached_posterior_mean = None self._cached_posterior_var = None self._cached_resampled_mean = None self._cached_resampled_var = None self.log_likelihood = 0.0 self.effective_sample_size = np.NaN if self._predicted_observation_sampler is not None: self.predicted_observation_particles = None self.predicted_observation_kde = None self.predicted_observation = np.NaN self.innovation = np.NaN self.innovationvar = np.NaN assert self._predicted_observation_sampler is not None or outlier_threshold is None self._outlier_threshold = outlier_threshold self._context = OrderedDict() self._initialise()
def to_python_float(x, allow_none=False, allow_ints=False, *args, **kwargs): if checks.is_some_float(x, allow_none): return float(x) elif allow_ints and checks.is_some_int(x, allow_none): return float(to_python_int(x)) elif checks.is_string(x): return str_to_float(x, *args, **kwargs) elif checks.is_iterable(x): return [to_python_float(e, *args, **kwargs) for e in x] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python float' % str(x))
def add_ma(self, window, column=None, prefix='ma(${WINDOW},', suffix=')', exclude_column_re=None, include_column_re=None): logger = logging.getLogger() checks.check_not_none(window) if not checks.is_iterable(window): window = [window] if column is None: column = self.__input_df.columns if not checks.is_iterable(column): column = [column] if exclude_column_re is not None: exclude_column_re = re.compile(exclude_column_re) if include_column_re is not None: include_column_re = re.compile(include_column_re) for c in column: if include_column_re is not None and not include_column_re.match( c): logger.info('- Excluding column due to include_column_re: %s' % c) continue if exclude_column_re is not None and exclude_column_re.match(c): logger.info('- Excluding column due to exclude_column_re: %s' % c) continue for w in window: c_prefix = prefix.replace('${WINDOW}', str(w)) c_suffix = suffix.replace('${WINDOW}', str(w)) new_column_name = c_prefix + c + c_suffix logger.info('- Adding new MA column: %s' % new_column_name) self.__input_df[new_column_name] = self.__input_df[c].rolling( window=w, center=False).mean() try: self.__truncate_from_above = max( self.__truncate_from_above, list(self.__input_df[new_column_name].isnull().values). index(False)) except ValueError: self.__truncate_from_above = max( self.__truncate_from_above, len(self.__input_df))
def add_lag(self, lag, column=None, prefix='lag(${LAG},', suffix=')', exclude_column_re=None, include_column_re=None): logger = logging.getLogger() checks.check_not_none(lag) if not checks.is_iterable(lag): lag = [lag] if column is None: column = self.__input_df.columns if not checks.is_iterable(column): column = [column] if exclude_column_re is not None: exclude_column_re = re.compile(exclude_column_re) if include_column_re is not None: include_column_re = re.compile(include_column_re) for c in column: if include_column_re is not None and not include_column_re.match( c): logger.info('- Excluding column due to include_column_re: %s' % c) continue if exclude_column_re is not None and exclude_column_re.match(c): logger.info('- Excluding column due to exclude_column_re: %s' % c) continue for l in lag: c_prefix = prefix.replace('${LAG}', str(l)) c_suffix = suffix.replace('${LAG}', str(l)) new_column_name = c_prefix + c + c_suffix logger.info('- Adding new lag column: %s' % new_column_name) self.__input_df[new_column_name] = self.__input_df[c].shift(l) try: self.__truncate_from_above = max( self.__truncate_from_above, list(self.__input_df[new_column_name].isnull().values). index(False)) except ValueError: self.__truncate_from_above = max( self.__truncate_from_above, len(self.__input_df))
def __init__(self, time, state_distr, process, name=None, pype=None, pype_options=frozenset(filtering.FilterPypeOptions)): super().__init__(name) self._pype = pype self._pype_options = frozenset() if (pype_options is None or pype is None) else frozenset(pype_options) if not checks.is_iterable(process): process = (process,) checks.check_instance(state_distr, N) process = checks.check_iterable_over_instances(process, proc.MarkovProcess) self._time = time self._state_distr = state_distr self._is_posterior = False self._processes = tuple(process) self._to_string_helper_KalmanFilter = None self._str_KalmanFilter = None if filtering.FilterPypeOptions.PRIOR_STATE in self._pype_options: self._pype.send(self.state)
def to_python_time(x, allow_datetimes=True, allow_none=False, *args, **kwargs): import numpy as np import pandas as pd if isinstance(x, dt.time): return x elif allow_datetimes and isinstance(x, dt.datetime): return x.time() elif allow_datetimes and isinstance(x, np.datetime64): return numpy_datetime64_to_python_datetime(x, *args, **kwargs).time() elif allow_datetimes and isinstance(x, pd.Timestamp): return pandas_timestamp_to_python_datetime(x, *args, **kwargs).time() elif isinstance(x, np.timedelta64): return numpy_timedelta64_to_python_time(x, allow_none) elif checks.is_string(x): return str_to_time(x, *args, **kwargs) elif checks.is_iterable(x): return [to_python_time(e, allow_datetimes, *args, **kwargs) for e in x] elif allow_none and x is None: return None raise ValueError('Unable to convert "%s" to Python time' % str(x))
def to_python_timedelta(x, allow_none=False): import numpy as np import pandas as pd if isinstance(x, np.timedelta64): return numpy_timedelta64_to_python_timedelta(x, allow_none) elif isinstance(x, pd.Timedelta): return pandas_timedelta_to_python_timedelta(x, allow_none) elif isinstance(x, dt.timedelta): return x elif checks.is_iterable(x): return [to_python_timedelta(e, allow_none) for e in x] elif allow_none and x is None: return None else: try: return dt.timedelta(seconds=x) except: pass raise ValueError('Unable to convert "%s" to Python timedelta' % str(x))
def __init__(self, filter, name, obs_model, observed_processes, *args, **kwargs): super().__init__(filter, name) if not checks.is_iterable(observed_processes): observed_processes = [observed_processes] observed_processes = tuple( checks.check_iterable_over_instances(observed_processes, proc.MarkovProcess)) if obs_model is None: obs_model = ParticleFilterObsModel.create( np.eye(sum([p.process_dim for p in observed_processes]))) self._obs_model = obs_model self._state_mean_rects = [] self._state_mean_rects = [] self._state_cov_diag_rects = [] for op in observed_processes: matched = False row = 0 for ap in self.filter._processes: process_dim = ap.process_dim if op is ap: matched = True self._state_mean_rects.append(np.s_[row:row + process_dim, 0:1]) self._state_cov_diag_rects.append( np.s_[row:row + process_dim, row:row + process_dim]) row += process_dim if not matched: raise ValueError( 'Each observed process must match a particle filter\'s process' ) self._state_cov_rects = [] for r in self._state_cov_diag_rects: startrow = r[0].start stoprow = r[0].stop rects = [] for r1 in self._state_cov_diag_rects: startcol = r1[1].start stopcol = r1[1].stop rects.append(np.s_[startrow:stoprow, startcol:stopcol]) self._state_cov_rects.append(rects)
def last(x): if isinstance(x, pd.DataFrame): return x.apply(first) else: return x[-1] if checks.is_iterable(x) else x
def __init__(self, fig, ax, auto_refresh, title, filter_name, process_prior_filter_states, process_posterior_filter_states, process_true_values, process_obs_results, state_indices=None, state_labels=None, observable_names=None, obs_indices=None, obs_labels=None, state_colours=_default_state_colours, true_value_colours=_default_true_value_colours, obs_colours=_default_obs_colours, *args, **kwargs): super().__init__(fig, ax, *args, **kwargs) self._process_prior_filter_states = process_prior_filter_states self._process_posterior_filter_states = process_posterior_filter_states self._process_true_values = process_true_values self._process_obs_results = process_obs_results if state_indices is not None: if not checks.is_iterable(state_indices): state_indices = (state_indices, ) else: state_indices = tuple(state_indices) if state_labels is not None: if not checks.is_iterable(state_labels): state_labels = (state_labels, ) else: state_labels = tuple(state_labels) checks.is_same_len_or_none(state_indices, state_labels) if observable_names is not None: checks.check_not_none(obs_indices) if not checks.is_iterable(observable_names): observable_names = (observable_names, ) else: observable_names = tuple(observable_names) if obs_indices is not None: checks.check_not_none(observable_names) if not checks.is_iterable(obs_indices): obs_indices = (obs_indices, ) else: obs_indices = tuple(obs_indices) if obs_labels is not None: if not checks.is_iterable(obs_labels): obs_labels = (obs_labels, ) else: obs_labels = tuple(obs_labels) checks.is_same_len_or_none(observable_names, obs_indices, obs_labels) self._auto_refresh = auto_refresh self._title = title self._filter_name = filter_name self._state_indices = state_indices self._state_labels = state_labels self._observable_names = observable_names self._obs_indices = obs_indices self._obs_labels = obs_labels self._state_colours = state_colours self._true_value_colours = true_value_colours self._obs_colours = obs_colours self._state_and_true_value_plots_inited = False self._obs_plots_inited = False if self._state_indices is not None: self._init_state_and_true_value_plots() self._inited_obs_index_count = 0 if self._observable_names is not None: if self._obs_labels is None: self._obs_labels = [] for observable_name, obs_index in zip(self._observable_names, self._obs_indices): if self._observable_names.count(observable_name) == 1: self._obs_labels.append(observable_name) else: self._obs_labels.append('%s %d' % (observable_name, obs_index)) self._obs_labels = tuple(self._obs_labels) self._actual_observable_names = self._observable_names self._actual_obs_indices = self._obs_indices self._actual_obs_labels = self._obs_labels self._init_obs_plots() self._obs_plots_inited = True else: self._actual_observable_names = [] self._actual_obs_indices = [] self._actual_obs_labels = []
def sparsen(df, aggregator=mean_or_last, date=None, time=None, datetime=None, bucket='date', new_bucket_column=None, fix_kind='last', fix_time=None, fix_points=10, min_fix_point_count=None, max_fix_point_count=None, min_min_fix_point_time=None, max_min_fix_point_time=None, min_max_fix_point_time=None, max_max_fix_point_time=None, already_sorted=False, aggregators_apply_to_df=False, exclude_original_temporal_columns=True, columns_to_exclude=None, return_extra_info=False): checks.is_at_least_one_not_none(datetime, date, time) if bucket == 'date': bucket = lambda x: conv.to_python_date(x, allow_datetimes=True) elif bucket == 'week': bucket = lambda x: tsatimes.first_day_of_week(x) columns_to_exclude = set() if columns_to_exclude is None else set( columns_to_exclude) if datetime is not None: checks.check_all_none(date, time) if isinstance(datetime, str): if exclude_original_temporal_columns: columns_to_exclude.add(datetime) if new_bucket_column is None and exclude_original_temporal_columns: new_bucket_column = datetime datetime = df[datetime].values temporals = datetime else: if isinstance(date, str): if exclude_original_temporal_columns: columns_to_exclude.add(date) if new_bucket_column is None and exclude_original_temporal_columns: new_bucket_column = date date = df[date].values if isinstance(time, str): if exclude_original_temporal_columns: columns_to_exclude.add(time) if new_bucket_column is None and exclude_original_temporal_columns: new_bucket_column = time time = df[time].values if date is not None and time is not None: temporals = [dt.datetime.combine(d, t) for d, t in zip(date, time)] elif date is not None: temporals = date else: # time is not None temporals = time if new_bucket_column is None: new_bucket_column = 'bucket' if fix_kind in ('first', 'after'): comparison = 'ge' elif fix_kind == 'after_exclusive': comparison = 'gt' elif fix_kind in ('last', 'before'): comparison = 'le' elif fix_kind == 'before_exclusive': comparison = 'lt' else: raise ValueError('Unfamiliar fix_kind: "%s"' % str(fix_kind)) if fix_kind in ('first', 'last'): checks.check_none(fix_time) else: checks.check_not_none(fix_time) numeric_fix_points = checks.is_some_number(fix_points) if not numeric_fix_points: fix_points = conv.to_python_timedelta(fix_points) grouping_df = pd.DataFrame({'temporals': temporals}) grouped_df = grouping_df.groupby(bucket(temporals)) columns = [new_bucket_column] data = {new_bucket_column: []} aggs = {} if checks.is_some_dict(aggregator): column_agg_pairs = aggregator.items() elif checks.is_iterable(aggregator): column_agg_pairs = aggregator else: column_agg_pairs = zip(df.columns, utils.xconst(aggregator)) for column, agg in column_agg_pairs: if column not in columns_to_exclude: columns.append(column) data[column] = [] aggs[column] = agg dates_with_no_points = [] dates_with_fix_point_limits_breached = col.OrderedDict() fix_point_counts = col.OrderedDict() for bucket, group_df in grouped_df: if len(group_df) == 0: dates_with_no_points.append(bucket) if not already_sorted: group_df = group_df.copy() group_df.sort_values('temporals', inplace=True) if fix_kind == 'first': fix_time = group_df['temporals'].values[0] elif fix_kind == 'last': fix_time = group_df['temporals'].values[-1] if numeric_fix_points: if comparison == 'ge': fix_point_indices = group_df.index[tsatimes.temporal_ge( group_df['temporals'], fix_time)][0:fix_points] elif comparison == 'gt': fix_point_indices = group_df.index[tsatimes.temporal_gt( group_df['temporals'], fix_time)][0:fix_points] elif comparison == 'le': fix_point_indices = group_df.index[tsatimes.temporal_le( group_df['temporals'], fix_time)][-fix_points:] else: # comparison == 'lt' fix_point_indices = group_df.index[tsatimes.temporal_lt( group_df['temporals'], fix_time)][-fix_points:] else: if comparison == 'ge': fix_point_indices = group_df.index[(tsatimes.temporal_ge(group_df['temporals'], fix_time)) & \ (tsatimes.temporal_le(group_df['temporals'], tsatimes.plus_timedelta(fix_time, fix_points)))] elif comparison == 'gt': fix_point_indices = group_df.index[(tsatimes.temporal_gt(group_df['temporals'], fix_time)) & \ (tsatimes.temporal_le(group_df['temporals'], tsatimes.plus_timedelta(fix_time, fix_points)))] elif comparison == 'le': fix_point_indices = group_df.index[(tsatimes.temporal_le(group_df['temporals'], fix_time)) & \ (tsatimes.temporal_ge(group_df['temporals'], tsatimes.plus_timedelta(fix_time, -fix_points)))] else: # comparison == 'lt': fix_point_indices = group_df.index[(tsatimes.temporal_lt(group_df['temporals'], fix_time)) & \ (tsatimes.temporal_ge(group_df['temporals'], tsatimes.plus_timedelta(fix_time, -fix_points)))] fix_point_limits_breached = set() if min_fix_point_count is not None and len( fix_point_indices) < min_fix_point_count: fix_point_limits_breached.add('min_fix_point_count') if max_fix_point_count is not None and len( fix_point_indices) > max_fix_point_count: fix_point_limits_breached.add('max_fix_point_count') if min_min_fix_point_time is not None: if checks.is_some_timedelta(min_min_fix_point_time): the_min_min_fix_point_time = fix_time + min_min_fix_point_time if comparison in ( 'ge', 'gt') else fix_time - min_min_fix_point_time else: the_min_min_fix_point_time = min_min_fix_point_time if tsatimes.temporal_lt( min(grouping_df['temporals'].values[fix_point_indices]), the_min_min_fix_point_time): fix_point_limits_breached.add('min_min_fix_point_time') if max_min_fix_point_time is not None: if checks.is_some_timedelta(max_min_fix_point_time): the_max_min_fix_point_time = fix_time + max_min_fix_point_time if comparison in ( 'ge', 'gt') else fix_time - max_min_fix_point_time else: the_max_min_fix_point_time = max_min_fix_point_time if tsatimes.temporal_gt( min(grouping_df['temporals'].values[fix_point_indices]), the_max_min_fix_point_time): fix_point_limits_breached.add('max_min_fix_point_time') if min_max_fix_point_time is not None: if checks.is_some_timedelta(min_max_fix_point_time): the_min_max_fix_point_time = fix_time + min_max_fix_point_time if comparison in ( 'ge', 'gt') else fix_time - min_max_fix_point_time else: the_min_max_fix_point_time = min_max_fix_point_time if tsatimes.temporal_lt( max(grouping_df['temporals'].values[fix_point_indices]), the_min_max_fix_point_time): fix_point_limits_breached.add('min_max_fix_point_time') if max_max_fix_point_time is not None: if checks.is_some_timedelta(max_max_fix_point_time): the_max_max_fix_point_time = fix_time + max_max_fix_point_time if comparison in ( 'ge', 'gt') else fix_time - max_max_fix_point_time else: the_max_max_fix_point_time = max_max_fix_point_time if tsatimes.temporal_gt( max(grouping_df['temporals'].values[fix_point_indices]), the_max_max_fix_point_time): fix_point_limits_breached.add('max_max_fix_point_time') if len(fix_point_limits_breached) > 0: dates_with_fix_point_limits_breached[ bucket] = fix_point_limits_breached else: data[new_bucket_column].append(bucket) for column in columns[1:]: if column not in columns_to_exclude: arg = df.iloc[ fix_point_indices] if aggregators_apply_to_df else df.iloc[ fix_point_indices][column].values data[column].append(aggs[column](arg)) fix_point_counts[bucket] = len(fix_point_indices) df = pd.DataFrame(data, columns=columns) if return_extra_info: return { 'df': df, 'dates_with_no_points': dates_with_no_points, 'dates_with_fix_point_limits_breached': dates_with_fix_point_limits_breached, 'fix_point_counts': fix_point_counts } else: return df
def numpy_timedelta64_to_python_time(x, allow_none=False): if checks.is_iterable(x): return [numpy_timedelta64_to_python_time(e, allow_none) for e in x] return (dt.datetime.min + numpy_timedelta64_to_python_timedelta(x, allow_none)).time()