예제 #1
0
def filter_instance_by_date(instance: Instance, start_date: str,
                            end_date: str) -> Instance:

    filtered_instance = instance.copy()
    if isinstance(instance.data.index, DatetimeIndex):
        filtered_instance.data = filtered_instance.data.loc[
            start_date:end_date]

    elif isinstance(instance.data.index, TimedeltaIndex):
        # todo fill for timedelta index

        filtered_instance.data.index = np.datetime64(
            filtered_instance.start_date) + filtered_instance.data.index
        filtered_instance.data = filtered_instance.data.loc[
            start_date:end_date, ]
        filtered_instance.data.index = filtered_instance.data.index - filtered_instance.data.index[
            0]

    else:
        raise PreprocessingException('Inappropriate DataFrame format')

    # change start time if needed
    if filtered_instance.start_date != filtered_instance.get_instance_index(0):
        filtered_instance.start_date = filtered_instance.get_instance_index(0)

    return None if len(filtered_instance.data) == 0 else filtered_instance
예제 #2
0
def resample(instance: Instance, resample_factor: str,
             resample_method: str) -> Instance:
    try:
        resampled = instance.data.resample(resample_factor).apply(
            methods[resample_method.strip().upper()])
        resampled_filled = resampled.fillna(method='bfill')
    except Exception as exc:
        raise PreprocessingException('Unappropriate argument for resampling',
                                     exc)
    else:
        return instance.copy_with_different_data(resampled_filled)
예제 #3
0
def __eliminate_peaks_using_quantiles(instance: Instance) -> Instance:
    low, high = 0.05, 0.95

    quantiles = instance.data.quantile([low, high])

    instance_with_eliminated_peaks = instance.copy()

    for name in instance.columns:
        instance_with_eliminated_peaks = instance[
            (instance[name] >= quantiles.loc[low, name])
            & (instance[name] <= quantiles.loc[high, name])]

    instance_with_eliminated_peaks.fillna(method='ffill', inplace=True)
    return instance_with_eliminated_peaks
예제 #4
0
def create_instance(num_of_columns,
                    num_of_values,
                    columns_prefix='param',
                    index=False):
    columns = [columns_prefix + str(i) for i in range(1, num_of_columns + 1)]

    values = [range(0, num_of_values) for _ in range(len(columns))]

    instance = Instance(
        '', pd.DataFrame(columns=columns, data=values, dtype=np.float64),
        'type', datetime.datetime.strptime('2018-11-11', '%Y-%d-%m'), {})
    if index:
        instance.data.index = ['2018-11-11', '2018-11-12', '2018-11-13']
        instance.data.index = pd.to_datetime(instance.data.index,
                                             format='%Y-%m-%d %H:%M:%S')

    return instance
예제 #5
0
    def create_instance(self, instance_json: dict) -> Instance:
        # Todo: finish implementation, all instances need to have same number of parameters and values
        if instance_json['data_range'] == None or instance_json[
                'data_range'].used_for_clustering:
            start = instance_json['data_range'].start if instance_json[
                'data_range'] else None
            end = instance_json['data_range'].end if instance_json[
                'data_range'] else None

            params = instance_json['params']
            params = pd.DataFrame(dtypes=[np.float32])
            instance = Instance(instance_json['uuid'], params,
                                instance_json['type'],
                                instance_json['date_added'],
                                instance_json['metadata'])

        return instance
예제 #6
0
def filter_parameters(instance: Instance,
                      parameters_used: List[str]) -> Instance:
    # if
    filtered_data = instance.data[parameters_used]
    return instance.copy_with_different_data(filtered_data)
예제 #7
0
def remove_constant_parameters(instance: Instance) -> Instance:
    data = instance.data
    data_without_constants = data.loc[:, (data != data.iloc[0]).any()]

    return instance.copy_with_different_data(data_without_constants)
예제 #8
0
def make_windows(instance: Instance, period) -> Instance:
    if isinstance(instance.data.index, DatetimeIndex):
        period_index_data = instance.data.to_period(period)
        return instance.copy_with_different_data(period_index_data)
    else:
        raise PreprocessingException('Inappropriate DataFrame format')
예제 #9
0
def standardize_instance(instance: Instance, means: Series, stdevs: Series):
    standardized = instance.copy()
    for param in means.index:
        standardized.data[param] = (instance.data[param] -
                                    means[param]) / stdevs[param]
    return standardized