def merge_split_overlap_IntervalTree(p_acts, r_acts): tree = IntervalTree() from result_analyse.visualisation import plotJoinTree PACT = column_index(p_acts, 'Activity') PSTIME = column_index(p_acts, 'StartTime') PETIME = column_index(p_acts, 'EndTime') for row in p_acts.values: if (row[PACT] == 0): continue start = row[PSTIME] end = row[PETIME] startv = start.value endv = end.value if (startv == endv): startv = startv - 1 #tree[start:end]={'P':{'Activitiy':act.Activity,'Type':'P','Data':act}] d = Data('P-act') d.P = {'Activity': row[PACT], 'StartTime': start, 'EndTime': end} d.R = None tree[startv:endv] = d RACT = column_index(r_acts, 'Activity') RSTIME = column_index(r_acts, 'StartTime') RETIME = column_index(r_acts, 'EndTime') for row in r_acts.values: if (row[RACT] == 0): continue start = row[RSTIME] end = row[RETIME] startv = start.value endv = end.value if (startv == endv): startv = startv - 1 #tree[start:end]=[{'Activitiy':act.Activity,'Type':'R','Data':act}] d = Data('R-act') d.P = None d.R = {'Activity': row[RACT], 'StartTime': start, 'EndTime': end} tree[startv:endv] = d # cmTreePlot(tree) tree.split_overlaps() # cmTreePlot(tree) def data_reducer(x, y): res = Data('merge') res.R = x.R res.P = x.P if not (y.P is None): if (res.P is None) or y.P['EndTime'] < res.P['EndTime']: res.P = y.P if not (y.R is None): if (res.R is None) or y.R['EndTime'] < res.R['EndTime']: res.R = y.R return res tree.merge_equals(data_reducer=data_reducer) return tree
def process(self, datasetdscr, dataset): removekeys = [] for sid, info in datasetdscr.sensor_desc.iterrows(): last = {} last['value'] = -1000 if (info.Nominal | info.OnChange): continue xs = dataset.s_events.loc[dataset.s_events.SID == sid] min = xs.value.min() max = xs.value.max() #print(min, max, max-min) invalid_changes = (max - min) * 0.1 for key, event in xs.iterrows(): invalid_changes = event['value'] * .1 if abs(last['value'] - event['value']) < invalid_changes: #print (event) removekeys.append(key) continue last = event # print(removekeys) d = Data(dataset.name) d.s_events = dataset.s_events.drop(removekeys) d.a_events = dataset.a_events d.s_event_list = d.s_events.values d.acts = dataset.acts d.act_map = dataset.act_map return d
def prepare_segment2(func, dtype, datasetdscr): segmentor = func.segmentor func.activityFetcher.precompute(dtype) procdata = Data(segmentor.__str__()) procdata.generator = segment2(dtype, datasetdscr, segmentor) procdata.set = [] procdata.label = [] procdata.set_window = [] procdata.acts = func.acts procdata.s_events = dtype.s_events procdata.s_event_list = dtype.s_event_list procdata.a_events = dtype.a_events i = 0 for x in procdata.generator: if i % 10000 == 0: print(segmentor.shortname(), i) i += 1 procdata.set_window.append(x) procdata.label.append( func.activityFetcher.getActivity2(dtype.s_event_list, x)) del procdata.generator procdata.label = np.array(procdata.label) return procdata
def makeTrainTest(self, sensor_events, activity_events): dataset_split = min(activity_events.StartTime) + \ ((max(activity_events.EndTime)-min(activity_events.StartTime))*4/5) dataset_split = pd.to_datetime(dataset_split.date()) # day Train = Data('train') Test = Data('test') Train.s_events = sensor_events[sensor_events.time < dataset_split] Train.a_events = activity_events[ activity_events.EndTime < dataset_split] Train.s_event_list = Train.s_events.values Test.s_events = sensor_events[sensor_events.time >= dataset_split] Test.a_events = activity_events[ activity_events.EndTime >= dataset_split] Test.s_event_list = Test.s_events.values return Train, Test
def get_info(self): func=self.functions result=Data('Result') result.shortrunname=func.shortrunname result.functions={} for f in func.__dict__: obj = func.__dict__[f] if isinstance(obj, MyTask): result.functions[f]=(obj.shortname(),obj.params) return result
def makeFoldTrainTest(self, sensor_events, activity_events, fold): sdate = sensor_events.time.apply(lambda x: x.date()) adate = activity_events.StartTime.apply(lambda x: x.date()) days = adate.unique() kf = KFold(n_splits=fold) kf.get_n_splits(days) for j, (train_index, test_index) in enumerate(kf.split(days)): Train0 = Data('train_fold_' + str(j)) Train0.s_events = sensor_events.loc[sdate.isin(days[train_index])] Train0.a_events = activity_events.loc[adate.isin( days[train_index])] Train0.s_event_list = Train0.s_events.values Test0 = Data('test_fold_' + str(j)) Test0.s_events = sensor_events.loc[sdate.isin(days[test_index])] Test0.a_events = activity_events.loc[adate.isin(days[test_index])] Test0.s_event_list = Test0.s_events.values yield Train0, Test0
def load(): gt = [(65, 141), (157, 187), (260, 304), (324, 326), (380, 393), (455, 470), (475, 485), (505, 555), (666, 807), (814, 888), (903, 929)] a = [ (66, 73), (78, 126), (135, 147), (175, 186), (225, 236), (274, 318), (349, 354), (366, 372), (423, 436), (453, 460), (467, 473), (487, 493), (501, 506), (515, 525), (531, 542), (545, 563), (576, 580), (607, 611), (641, 646), (665, 673), (678, 898), (907, 933) ] b = [(63, 136), (166, 188), (257, 310), (451, 473), (519, 546), (663, 916)] dataset = Data('test') dataset.activities = ['null', 'Act'] dataset.activities_map = {0: 'null', 1: 'Act'} dataset.activities_map_inverse = {'null': 0, 'Act': 1} dataset.activity_events = pd.DataFrame( columns=["StartTime", "EndTime", "Activity", 'Duration']) init = pd.to_datetime('1/1/2020') for k in gt: actevent = { "StartTime": init + pd.to_timedelta(str(k[0]) + 's'), "EndTime": init + pd.to_timedelta(str(k[1]) + 's'), "Activity": 1, 'Duration': pd.to_timedelta(str(k[1] - k[0]) + 's') } dataset.activity_events = dataset.activity_events.append( actevent, ignore_index=True) aevents = pd.DataFrame( columns=["StartTime", "EndTime", "Activity", 'Duration']) for k in a: actevent = { "StartTime": init + pd.to_timedelta(str(k[0]) + 's'), "EndTime": init + pd.to_timedelta(str(k[1]) + 's'), "Activity": 1, 'Duration': pd.to_timedelta(str(k[1] - k[0]) + 's') } aevents = aevents.append(actevent, ignore_index=True) bevents = pd.DataFrame( columns=["StartTime", "EndTime", "Activity", 'Duration']) for k in b: actevent = { "StartTime": init + pd.to_timedelta(str(k[0]) + 's'), "EndTime": init + pd.to_timedelta(str(k[1]) + 's'), "Activity": 1, 'Duration': pd.to_timedelta(str(k[1] - k[0]) + 's') } bevents = bevents.append(actevent, ignore_index=True) return dataset, aevents, bevents
def merge_split_overlap_IntervalTree(p_acts, r_acts): tree = IntervalTree() for act in p_acts: if (act['Activity'] == 0): continue start = act['StartTime'].value end = act['EndTime'].value if (start == end): start = start - 1 #tree[start:end]={'P':{'Activitiy':act.Activity,'Type':'P','Data':act}] d = Data('P-act') d.P = act d.R = None tree[start:end] = d #{'P':act,'PActivitiy':act.Activity} for act in r_acts: start = act['StartTime'].value end = act['EndTime'].value if (start == end): start = start - 1 #tree[start:end]=[{'Activitiy':act.Activity,'Type':'R','Data':act}] d = Data('P-act') d.P = None d.R = act tree[start:end] = d #{'R':act,'RActivitiy':act.Activity} tree.split_overlaps() def data_reducer(x, y): res = x if not (y.P is None): if (res.P is None) or y.P['EndTime'] < res.P['EndTime']: res.P = y.P if not (y.R is None): if (res.R is None) or y.R['EndTime'] < res.R['EndTime']: res.R = y.R return res tree.merge_equals(data_reducer=data_reducer) return tree
def create(real, pred, filename): evalres = [{}] evalres[0]['test'] = Data('test res') evalres[0]['test'].real_events = vs.convert2event(real) evalres[0]['test'].pred_events = vs.convert2event(pred) evalres[0]['test'].quality = {} dataset = Data('MyDataset') dataset.activities = ['None', 'Act'] dataset.activity_events = evalres[0]['test'].real_events dataset.activities_map_inverse = { k: v for v, k in enumerate(dataset.activities) } dataset.activities_map = {v: k for v, k in enumerate(dataset.activities)} dataset.sensor_events = pd.DataFrame() runinfo = filename utils.saveState([runinfo, dataset, evalres], filename)
def data_reducer(x, y): res = Data('merge') res.R = x.R res.P = x.P if not (y.P is None): if (res.P is None) or y.P['EndTime'] < res.P['EndTime']: res.P = y.P if not (y.R is None): if (res.R is None) or y.R['EndTime'] < res.R['EndTime']: res.R = y.R return res
def fewDaysSplit(self, data, count): sensor_events = data.s_events activity_events = data.a_events sdate = sensor_events.time.apply(lambda x: x.date()) adate = activity_events.StartTime.apply(lambda x: x.date()) days = adate.unique() import random selecteddays = random.sample(list(days), count) Train0 = Data('train_random_days' + str(selecteddays)) Train0.s_events = sensor_events.loc[sdate.isin(selecteddays)] Train0.a_events = activity_events.loc[adate.isin(selecteddays)] Train0.s_event_list = Train0.s_events.values return Train0
def pipeline(self,func,data,train): import os os.system("taskset -p 0xff %d" % os.getpid()) func.acts=self.acts logger.debug('Starting .... %s' % (func.shortrunname)) Tdata=func.preprocessor.process(self.datasetdscr, data) logger.debug('Preprocessing Finished %s' % (func.preprocessor.shortname())) Sdata=prepare_segment2(func,Tdata,self.datasetdscr) logger.debug('Segmentation Finished %d segment created %s' % (len(Sdata.set_window), func.segmentor.shortname())) Sdata.set=featureExtraction(func.featureExtractor,self.datasetdscr,Sdata,True) logger.debug('FeatureExtraction Finished shape %s , %s' % (str(Sdata.set.shape), func.featureExtractor.shortname())) if(train): func.classifier.createmodel(Sdata.set[0].shape,len(self.acts)) func.classifier.setWeight(self.weight) logger.debug('Classifier model created %s' % (func.classifier.shortname())) func.classifier.train(Sdata.set, Sdata.label) logger.debug('Classifier model trained %s' % (func.classifier.shortname())) logger.info("Evaluating....") result=Data('Result') result.shortrunname=func.shortrunname result.Sdata=Sdata result.functions={} for f in func.__dict__: obj = func.__dict__[f] if isinstance(obj, MyTask): result.functions[f]=(obj.shortname(),obj.params) result.predicted =func.classifier.predict(Sdata.set) result.predicted_classes=func.classifier.predict_classes(Sdata.set) pred_events =func.combiner.combine(Sdata.s_event_list,Sdata.set_window,result.predicted) logger.debug('events merged %s' % (func.combiner.shortname())) result.pred_events =pred_events result.real_events =data.a_events result.event_cm =event_confusion_matrix(data.a_events,pred_events,self.acts) result.quality =CMbasedMetric(result.event_cm,'macro',self.weight) #eventeval=EventBasedMetric(Sdata.a_events,pred_events,self.acts) logger.debug('Evalution quality is %s'%result.quality) return result
def method_param_selector(callback, uniquekey): import itertools from constants import methods s = [ methods.preprocessing, methods.segmentation, methods.activity_fetcher, methods.feature_extraction, methods.classifier ] permut = list(itertools.product(*s)) allpool = [] for item in permut: func = Data('Functions') func.uniquekey = uniquekey func.preprocessor = createFunction(item[0]) func.segmentor = createFunction(item[1]) func.activityFetcher = createFunction(item[2]) func.featureExtractor = createFunction(item[3]) func.classifier = createFunction(item[4]) func.combiner = createFunction(methods.combiner[0]) func.classifier_metric = createFunction(methods.classifier_metric[0]) func.event_metric = createFunction(methods.classifier_metric[0]) func.shortrunname = '' for k in func.__dict__: obj = func.__dict__[k] if isinstance(obj, MyTask): obj.func = func func.shortrunname += obj.shortname() + '_' optl = OptLearn(func, callback) allpool.append(optl) # break success, fail = run(allpool, True) bestJobscore = success[0].result['optq']['q'] bestJob = success[0] for job in success: if (bestJobscore > job.result['optq']['q']): bestJobscore = job.result['optq']['q'] bestJob = job return bestJob
def justifySet(self, acts, Train, add_other_action=False): inp = [Train] out = [] if (acts[0] != 0): acts.insert(0, 0) act_map = {a: i for i, a in enumerate(acts)} for dtype in inp: ndtype = Data(dtype.name) ndtype.s_events = dtype.s_events ndtype.a_events = dtype.a_events.copy() ndtype.a_events.Activity = dtype.a_events.Activity.apply( lambda x: act_map[x] if x in act_map else (len(acts) if add_other_action else 0)) out.append(ndtype) ndtype.act_map = act_map ndtype.acts = acts return out[0] #Train
import feature_extraction.Raw # from general.libimport import * from general.utils import Data import metric.classical import ml_strategy.Simple import ml_strategy.FastFinder import ml_strategy.SeperateGroup import ml_strategy.WeightedGroup import ml_strategy.WeightedGroup2 import preprocessing.SimplePreprocessing import segmentation.Probabilistic import segmentation.FixedEventWindow import segmentation.FixedSlidingWindow import segmentation.FixedTimeWindow methods = Data('methods') methods.segmentation = [ {'method': lambda: segmentation.FixedEventWindow.FixedEventWindow(), 'params': [ {'var': 'size', 'min': 10, 'max': 30, 'type': 'int', 'init': 10, 'range':list(range(10,26,5))}, {'var': 'shift', 'min': 2, 'max': 20, 'type': 'int', 'init': 10, 'range':list(range(10,16,5))} ], 'findopt': False}, {'method': lambda: segmentation.FixedSlidingWindow.FixedSlidingWindow(), 'params': [ {'var': 'size' , 'min': 60, 'max': 15*60, 'type': 'float', 'init': 120/4, 'range':list(range(15,76,15))}, {'var': 'shift', 'min': 10, 'max': 7*60 , 'type': 'float', 'init': 60/2, 'range':list(range(15,45,15))} ], 'findopt': False}, {'method': lambda: segmentation.Probabilistic.Probabilistic(), 'params': [], 'findopt':False}, # {'method': lambda:segmentation.FixedTimeWindow.FixedTimeWindow(), 'params':[ # {'var':'size','min':pd.Timedelta(1, unit='s').total_seconds(), 'max': pd.Timedelta(30, unit='m').total_seconds(), 'type':'float','init':pd.Timedelta(15, unit='s').total_seconds()}, # {'var':'shift','min':pd.Timedelta(1, unit='s').total_seconds(), 'max': pd.Timedelta(30, unit='m').total_seconds(), 'type':'float','init':pd.Timedelta(1, unit='s').total_seconds()}
def fusion(self, results, real_events, isTrain): intree = IntervalTree() logger.info("\n=======================fusion activties ========") # intree = IntervalTree() # Segmentaion ########################### for indx, tacts in enumerate(self.gacts): result = results[indx] for i in range(0, len(result.Sdata.set_window)): idx = result.Sdata.set_window[i] start = result.Sdata.s_event_list[idx[0], 1] end = result.Sdata.s_event_list[idx[-1], 1] rcls = result.Sdata.label[i] pcls = result.predicted_classes[i] fullprob = result.predicted[i] if (end == start): continue d = Data(str(i)) d.real = rcls d.pred = pcls d.pred_prob = fullprob if (isTrain): self.train_quality[indx] = result.quality d.gindx = indx # {'real':rcls,'pred':pcls,'pred_prob':fullprob,'train_q':result.quality} intree[start:end] = d intree.split_overlaps() segments = defaultdict(dict) for item in intree.items(): segments[item.begin.value << 64 | item.end.value]['begin'] = item.begin segments[item.begin.value << 64 | item.end.value]['end'] = item.end segments[item.begin.value << 64 | item.end.value][item.data.gindx] = item.data # Feature Extraction ########################### f = np.zeros((len(segments), len(self.gacts) * len(self.acts))) label = np.zeros(len(segments)) times = [] iseg = 0 for timeseg in segments: seg = segments[timeseg] b = seg['begin'] e = seg['end'] times.append({'begin': b, 'end': e}) for indx in range(len(self.gacts)): if (indx in seg): label[iseg] = seg[indx].real start = indx * len(self.acts) end = (indx + 1) * len(self.acts) if (self.train_quality[indx]['f1'] < 0.1): continue f[iseg, start:end] = seg[indx].pred_prob iseg += 1 #TRAIN ####################### if (isTrain): inputsize = (len(f[0]), ) outputsize = len(self.acts) self.fusion_model = tf.keras.models.Sequential([ tf.keras.layers.Dense(128, input_shape=inputsize), tf.keras.layers.Dense(512, activation=tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(outputsize, activation=tf.nn.softmax) ], name='fusion') if (np.max(label) == 0): # self.trained=False cw = np.ones(len(self.acts)) else: cw = compute_class_weight("balanced", self.acts, label) self.fusion_model.summary() self.fusion_model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=[ tf.keras.metrics.SparseCategoricalAccuracy(name='acc') ]) self.fusion_model.fit(f, label, epochs=10, class_weight=cw) #EVALUATE ####################### result = Data('result') result.results = results result.predicted = self.fusion_model.predict(f) result.predicted_classes = self.fusion_model.predict_classes(f) # predicted = np.argmax(model.predict(f), axis=1) pred_events = [] ptree = {} epsilon = pd.to_timedelta('1s') for i in range(len(f)): start = times[i]['begin'] end = times[i]['end'] pclass = result.predicted_classes[i] pred_events.append({ 'Activity': pclass, 'StartTime': start, 'EndTime': end }) pred_events = pd.DataFrame(pred_events) pred_events = pred_events.sort_values(['StartTime']) pred_events = pred_events.reset_index() pred_events = pred_events.drop(['index'], axis=1) result.shortrunname = "fusion model" + str( {r: results[r].shortrunname for r in results}) result.times = times result.pred_events = pred_events result.real_events = real_events result.event_cm = event_confusion_matrix(result.real_events, result.pred_events, self.acts) result.quality = CMbasedMetric(result.event_cm, 'macro') result.functions = {r: results[r].functions for r in results} logger.debug('Evalution quality is %s' % result.quality) return result
def fusion(self, results, real_events, isTrain): intree = IntervalTree() logger.info("\n=======================fusion activties ========") # intree = IntervalTree() # Segmentaion ########################### for indx, tacts in enumerate(self.gacts): result = results[indx] for i in range(0, len(result.Sdata.set_window)): idx = result.Sdata.set_window[i] start = result.Sdata.s_event_list[idx[0], 1] end = result.Sdata.s_event_list[idx[-1], 1] rcls = result.Sdata.label[i] pcls = result.predicted_classes[i] fullprob = result.predicted[i] if (end == start): continue d = Data(str(i)) d.real = rcls d.pred = pcls d.pred_prob = fullprob if (isTrain): self.train_quality[indx] = result.quality d.gindx = indx # {'real':rcls,'pred':pcls,'pred_prob':fullprob,'train_q':result.quality} intree[start:end] = d intree.split_overlaps() segments = defaultdict(dict) for item in intree.items(): segments[item.begin.value << 64 | item.end.value]['begin'] = item.begin segments[item.begin.value << 64 | item.end.value]['end'] = item.end segments[item.begin.value << 64 | item.end.value][item.data.gindx] = item.data probs = np.zeros((len(segments), len(self.acts))) # Feature Extraction ########################### label = np.zeros(len(segments)) times = [] iseg = 0 for timeseg in segments: seg = segments[timeseg] b = seg['begin'] e = seg['end'] times.append({'begin': b, 'end': e}) for indx in range(len(self.gacts)): if (indx in seg): label[iseg] = seg[indx].real if (self.mode == 1): probs[iseg, :] += np.array( seg[indx].pred_prob ) * self.train_quality[indx]['f1'] / len(self.gacts) elif self.mode == 2: p = np.zeros(len(self.acts)) p[np.argmax(seg[indx].pred_prob)] = 1 probs[iseg, :] += p else: p = np.zeros(len(self.acts)) p[np.argmax(seg[indx].pred_prob )] = self.train_quality[indx]['f1'] probs[iseg, :] += p iseg += 1 plabel = np.argmax(probs, 1) #EVALUATE ####################### result = Data('result') result.results = results result.predicted = probs result.predicted_classes = plabel # predicted = np.argmax(model.predict(f), axis=1) pred_events = [] ptree = {} epsilon = pd.to_timedelta('1s') for i in range(len(segments)): start = times[i]['begin'] end = times[i]['end'] pclass = result.predicted_classes[i] pred_events.append({ 'Activity': pclass, 'StartTime': start, 'EndTime': end }) pred_events = pd.DataFrame(pred_events) pred_events = pred_events.sort_values(['StartTime']) pred_events = pred_events.reset_index() pred_events = pred_events.drop(['index'], axis=1) result.shortrunname = "fusion model" + str( {r: results[r].shortrunname for r in results}) result.times = times result.pred_events = pred_events result.real_events = real_events result.event_cm = event_confusion_matrix(result.real_events, result.pred_events, self.acts) result.quality = CMbasedMetric(result.event_cm, 'macro') result.functions = {r: results[r].functions for r in results} logger.debug('Evalution quality is %s' % result.quality) return result
model.evaluate(f,label) combiner=EmptyCombiner() predicted=model.predict_classes(f) # predicted = np.argmax(model.predict(f), axis=1) pred_events = [] ptree = {} epsilon=pd.to_timedelta('1s') for i in range(len(f)): start = times[i][0] end = times[i][1] pclass = predicted[i] pred_events.append({'Activity': pclass, 'StartTime': start, 'EndTime': end}) pred_events = pd.DataFrame(pred_events) pred_events = pred_events.sort_values(['StartTime']) pred_events = pred_events.reset_index() pred_events = pred_events.drop(['index'], axis=1) result=Data('result') result.pred_events =pred_events result.real_events =savedata.train_results[11].real_events result.event_cm =event_confusion_matrix(result.real_events,result.pred_events,savedata.acts) result.quality =CMbasedMetric(result.event_cm,'macro') print(result.quality)