def process(self, datasetdscr, dataset): removekeys = [] for sid, info in datasetdscr.sensor_desc.iterrows(): last = {} last['value'] = -1000 if (info.Nominal | info.OnChange): continue xs = dataset.s_events.loc[dataset.s_events.SID == sid] min = xs.value.min() max = xs.value.max() #print(min, max, max-min) invalid_changes = (max - min) * 0.1 for key, event in xs.iterrows(): invalid_changes = event['value'] * .1 if abs(last['value'] - event['value']) < invalid_changes: #print (event) removekeys.append(key) continue last = event # print(removekeys) d = Data(dataset.name) d.s_events = dataset.s_events.drop(removekeys) d.a_events = dataset.a_events d.s_event_list = d.s_events.values d.acts = dataset.acts d.act_map = dataset.act_map return d
def prepare_segment2(func, dtype, datasetdscr): segmentor = func.segmentor func.activityFetcher.precompute(dtype) procdata = Data(segmentor.__str__()) procdata.generator = segment2(dtype, datasetdscr, segmentor) procdata.set = [] procdata.label = [] procdata.set_window = [] procdata.acts = func.acts procdata.s_events = dtype.s_events procdata.s_event_list = dtype.s_event_list procdata.a_events = dtype.a_events i = 0 for x in procdata.generator: if i % 10000 == 0: print(segmentor.shortname(), i) i += 1 procdata.set_window.append(x) procdata.label.append( func.activityFetcher.getActivity2(dtype.s_event_list, x)) del procdata.generator procdata.label = np.array(procdata.label) return procdata
def makeTrainTest(self, sensor_events, activity_events): dataset_split = min(activity_events.StartTime) + \ ((max(activity_events.EndTime)-min(activity_events.StartTime))*4/5) dataset_split = pd.to_datetime(dataset_split.date()) # day Train = Data('train') Test = Data('test') Train.s_events = sensor_events[sensor_events.time < dataset_split] Train.a_events = activity_events[ activity_events.EndTime < dataset_split] Train.s_event_list = Train.s_events.values Test.s_events = sensor_events[sensor_events.time >= dataset_split] Test.a_events = activity_events[ activity_events.EndTime >= dataset_split] Test.s_event_list = Test.s_events.values return Train, Test
def makeFoldTrainTest(self, sensor_events, activity_events, fold): sdate = sensor_events.time.apply(lambda x: x.date()) adate = activity_events.StartTime.apply(lambda x: x.date()) days = adate.unique() kf = KFold(n_splits=fold) kf.get_n_splits(days) for j, (train_index, test_index) in enumerate(kf.split(days)): Train0 = Data('train_fold_' + str(j)) Train0.s_events = sensor_events.loc[sdate.isin(days[train_index])] Train0.a_events = activity_events.loc[adate.isin( days[train_index])] Train0.s_event_list = Train0.s_events.values Test0 = Data('test_fold_' + str(j)) Test0.s_events = sensor_events.loc[sdate.isin(days[test_index])] Test0.a_events = activity_events.loc[adate.isin(days[test_index])] Test0.s_event_list = Test0.s_events.values yield Train0, Test0
def fewDaysSplit(self, data, count): sensor_events = data.s_events activity_events = data.a_events sdate = sensor_events.time.apply(lambda x: x.date()) adate = activity_events.StartTime.apply(lambda x: x.date()) days = adate.unique() import random selecteddays = random.sample(list(days), count) Train0 = Data('train_random_days' + str(selecteddays)) Train0.s_events = sensor_events.loc[sdate.isin(selecteddays)] Train0.a_events = activity_events.loc[adate.isin(selecteddays)] Train0.s_event_list = Train0.s_events.values return Train0
def justifySet(self, acts, Train, add_other_action=False): inp = [Train] out = [] if (acts[0] != 0): acts.insert(0, 0) act_map = {a: i for i, a in enumerate(acts)} for dtype in inp: ndtype = Data(dtype.name) ndtype.s_events = dtype.s_events ndtype.a_events = dtype.a_events.copy() ndtype.a_events.Activity = dtype.a_events.Activity.apply( lambda x: act_map[x] if x in act_map else (len(acts) if add_other_action else 0)) out.append(ndtype) ndtype.act_map = act_map ndtype.acts = acts return out[0] #Train