def filter_instance_by_date(instance: Instance, start_date: str, end_date: str) -> Instance: filtered_instance = instance.copy() if isinstance(instance.data.index, DatetimeIndex): filtered_instance.data = filtered_instance.data.loc[ start_date:end_date] elif isinstance(instance.data.index, TimedeltaIndex): # todo fill for timedelta index filtered_instance.data.index = np.datetime64( filtered_instance.start_date) + filtered_instance.data.index filtered_instance.data = filtered_instance.data.loc[ start_date:end_date, ] filtered_instance.data.index = filtered_instance.data.index - filtered_instance.data.index[ 0] else: raise PreprocessingException('Inappropriate DataFrame format') # change start time if needed if filtered_instance.start_date != filtered_instance.get_instance_index(0): filtered_instance.start_date = filtered_instance.get_instance_index(0) return None if len(filtered_instance.data) == 0 else filtered_instance
def resample(instance: Instance, resample_factor: str, resample_method: str) -> Instance: try: resampled = instance.data.resample(resample_factor).apply( methods[resample_method.strip().upper()]) resampled_filled = resampled.fillna(method='bfill') except Exception as exc: raise PreprocessingException('Unappropriate argument for resampling', exc) else: return instance.copy_with_different_data(resampled_filled)
def __eliminate_peaks_using_quantiles(instance: Instance) -> Instance: low, high = 0.05, 0.95 quantiles = instance.data.quantile([low, high]) instance_with_eliminated_peaks = instance.copy() for name in instance.columns: instance_with_eliminated_peaks = instance[ (instance[name] >= quantiles.loc[low, name]) & (instance[name] <= quantiles.loc[high, name])] instance_with_eliminated_peaks.fillna(method='ffill', inplace=True) return instance_with_eliminated_peaks
def create_instance(num_of_columns, num_of_values, columns_prefix='param', index=False): columns = [columns_prefix + str(i) for i in range(1, num_of_columns + 1)] values = [range(0, num_of_values) for _ in range(len(columns))] instance = Instance( '', pd.DataFrame(columns=columns, data=values, dtype=np.float64), 'type', datetime.datetime.strptime('2018-11-11', '%Y-%d-%m'), {}) if index: instance.data.index = ['2018-11-11', '2018-11-12', '2018-11-13'] instance.data.index = pd.to_datetime(instance.data.index, format='%Y-%m-%d %H:%M:%S') return instance
def create_instance(self, instance_json: dict) -> Instance: # Todo: finish implementation, all instances need to have same number of parameters and values if instance_json['data_range'] == None or instance_json[ 'data_range'].used_for_clustering: start = instance_json['data_range'].start if instance_json[ 'data_range'] else None end = instance_json['data_range'].end if instance_json[ 'data_range'] else None params = instance_json['params'] params = pd.DataFrame(dtypes=[np.float32]) instance = Instance(instance_json['uuid'], params, instance_json['type'], instance_json['date_added'], instance_json['metadata']) return instance
def filter_parameters(instance: Instance, parameters_used: List[str]) -> Instance: # if filtered_data = instance.data[parameters_used] return instance.copy_with_different_data(filtered_data)
def remove_constant_parameters(instance: Instance) -> Instance: data = instance.data data_without_constants = data.loc[:, (data != data.iloc[0]).any()] return instance.copy_with_different_data(data_without_constants)
def make_windows(instance: Instance, period) -> Instance: if isinstance(instance.data.index, DatetimeIndex): period_index_data = instance.data.to_period(period) return instance.copy_with_different_data(period_index_data) else: raise PreprocessingException('Inappropriate DataFrame format')
def standardize_instance(instance: Instance, means: Series, stdevs: Series): standardized = instance.copy() for param in means.index: standardized.data[param] = (instance.data[param] - means[param]) / stdevs[param] return standardized