Python Interval.update_conditionally Examples

Programming Language: Python

Namespace/Package Name: interval

Class/Type: Interval

Method/Function: update_conditionally

Examples at hotexamples.com: 1

Python Interval.update_conditionally - 1 examples found. These are the top rated real world Python examples of interval.Interval.update_conditionally extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Interval(30)

begin(6)

expire(4)

insert(3)

overlaps(3)

between(3)

durration_calc(2)

fromlist(2)

lower(2)

mergeOverlapping(2)

end(2)

start(2)

tolist(2)

__init__(2)

upper(2)

overlapwith(2)

mergeIntervals(1)

pair(1)

members(1)

maxInterval(1)

mapto(1)

set_interval(1)

random(1)

range_str(1)

raw_endpoint(1)

remove(1)

sin(1)

split_between(1)

time_period(1)

union(1)

update(1)

update_conditionally(1)

lower_closed(1)

liesInInterval(1)

low(1)

filter(1)

__str__(1)

append(1)

at(1)

calculate_reciprocal_overlap(1)

cancel(1)

collapse_intervals(1)

contains(1)

cos(1)

enlarge(1)

fromInterval(1)

log(1)

fromMax(1)

generateRandomIntervalPairWithInRange(1)

get_name(1)

Example #1

Show file

File: observation.py Project: boniolp/EDF_WebApp

class Observation:
    def __init__(self, path, reactor_site, suffix_list, format="%Y-%m-%dT%H:%M:%S.000Z",
                 hours_backfill=1, verbose=0, ignore_keys=[], remove_on = [deb1[0]]):
        self.verboseprint = print if verbose else lambda *a, **k: None
        self.verboseprint("Loading in memory %i observations..." % (int(len(suffix_list)),))
        self.hours_backfill = hours_backfill
        files_name = [reactor_site + "-" + suffix + ".txt" for suffix in suffix_list]
        list_df = [pd.read_csv(path + file_name, sep=";") for file_name in files_name]
        self.ignore_keys = ignore_keys # TODO : remove deprecated
        self.remove_on = remove_on
        for df, tag in zip(list_df, suffix_list):
            df.columns = ["date", tag]
            df.drop_duplicates(subset="date", inplace=True)
            df['date'] = pd.to_datetime(df['date'], format=format)
            df.set_index('date', inplace=True)
        self.verboseprint("Concatenation...")
        self.df = pd.concat(list_df, axis=1)
        self.bad_labels_dict = {}
        self.change_isolated_wrong_values()
        self.verboseprint("Forward Filling...")
        self.df.fillna(method='ffill', inplace=True)
        self.verboseprint("Backward Filling...")
        self.df.fillna(method='bfill', inplace=True)

        self.compute_intervals_to_remove()
        self.compute_full_concatenated_df()
        self.compute_low_regime_intervals()

    def change_isolated_wrong_values(self):
        self.verboseprint("Changing isolated wrong values...")
        for column in self.df:
            bad_labels = self.df.index[((self.df[column] == MAX_VALUE) | (self.df[column] == 0))] ## >= THRESHOLD
            bad_labels = sequence_to_interval(bad_labels, timedelta(minutes=10))  # Stricly consecutive wrong values
            to_change_index = (bad_labels[:, 1] - bad_labels[:, 0]) <= timedelta(hours=self.hours_backfill)
            for begin, end in bad_labels[to_change_index]:
                self.df[column][begin:end] = np.nan
            if column in self.remove_on:
                self.bad_labels_dict[column] = bad_labels[~to_change_index]
            else:
                for begin, end in bad_labels[~to_change_index]:
                    self.df[column][begin:end] = np.nan

    def compute_intervals_to_remove(self):
        self.intervals_to_remove = Interval([])
        for key, intervals_bad_level in self.bad_labels_dict.items():
            if (key not in self.ignore_keys):
                self.intervals_to_remove.update(intervals_bad_level)

    def compute_low_regime_intervals(self):
        #time_precision = '10m'#'6H'
        low_regime_merge_time = timedelta(days=15)  # In days: The merging time for low regime
        margin_intervals_to_remove = timedelta(minutes=10)  # In days: Be careful, a high time_precision can make this wrong !
        filter_spike = timedelta(hours=1)  # In days: below that, the interval is considered as a spike !

        #subsample = self.full_concatenated_df[deb1[0]].resample(time_precision, label='right').min()
        subsample = self.full_concatenated_df[deb1[0]]
        self.low_regime_intervals = sequence_to_interval(subsample.index[(subsample < 200)],
                                                         low_regime_merge_time)
        self.low_regime_intervals = Interval(self.low_regime_intervals)
        self.low_regime_intervals.update_conditionally(
            self.intervals_to_remove.enlarge(margin_intervals_to_remove))
        self.low_regime_intervals.filter(filter_spike)

    def compute_full_concatenated_df(self):
        self.full_concatenated_df = pd.concat(self.intervals_to_remove.split_between(self.df),axis=0)