def _execute(self, sources, alignment_stream, interval):
        if self.names and len(self.names) != len(sources):
            raise TypeError(
                "Tool AlignedMerge expected {} streams as input, got {} instead"
                .format(len(self.names), len(sources)))
        streams = [
            iter(source.window(interval, force_calculation=True))
            for source in sources
        ]

        # Take data from the execute
        while True:
            try:
                docs = [next(stream) for stream in streams]
                times = [tt for (tt, dd) in docs]
                for tt in times[1:]:
                    if tt != times[0]:
                        raise ValueError(
                            "Tool AlignedMerge expects aligned streams, "
                            "but received conflicting timestamps {} and {}".
                            format(times[0], tt))
                values = [dd for (tt, dd) in docs]
                if self.names is None:
                    yield StreamInstance(times[0], values)
                else:
                    # noinspection PyTypeChecker
                    yield StreamInstance(
                        times[0],
                        dict([(name, values[i])
                              for i, name in enumerate(self.names)]))
            except StopIteration:
                break
 def _execute(self, source, interval):
     source_last_doc = source.window(interval,
                                     force_calculation=True).last()
     if not source_last_doc:
         return
     timestamp, data = source_last_doc
     if self.element is None:
         data_element = data
     else:
         if self.element in data:
             data_element = data[self.element]
         else:
             return
     try:  # try if data_element can be used as a dict
         for key, value in data_element.items():
             if self.use_value_instead_of_key:
                 yield StreamMetaInstance(
                     StreamInstance(timestamp=timestamp, value=value),
                     value)
             else:
                 yield StreamMetaInstance(
                     StreamInstance(timestamp=timestamp, value=value), key)
     except AttributeError:  # otherwise assume that data_element can be used as a list
         for value in data_element:
             yield StreamMetaInstance(
                 StreamInstance(timestamp=timestamp, value=value), value)
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         if self.complement:
             if data[self.key] not in self.values:
                 yield StreamInstance(time, data)
         else:
             if self.key in data and data[self.key] in self.values:
                 yield StreamInstance(time, data)
Example #4
0
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         dict_mean = dict()
         if len(data)==0:
             yield StreamInstance(time, dict_mean)
         inv_len_data = 1/float(len(data))
         for item in data:
             for key in item.keys():
                 try:
                     dict_mean[key] = dict_mean[key] + item[key]*inv_len_data
                 except KeyError:
                     dict_mean[key] = item[key]*inv_len_data
         yield StreamInstance(time, dict_mean)
    def _execute(self, sources, alignment_stream, interval):
        time_interval = TimeInterval(MIN_DATE, interval.end)
        param_doc = sources[0].window(time_interval,
                                      force_calculation=True).last()
        if param_doc is None:
            logging.debug("No model found in {} for time interval {}".format(
                sources[0].stream_id, time_interval))
            return

        steps = deserialise_json_pipeline(
            {
                'vectorisation': DictVectorizer(sparse=False),
                'fill_missing': FillZeros(),
                'classifier': LinearDiscriminantAnalysis(),
                'label_encoder': LabelEncoder()
            }, param_doc.value)

        clf = Pipeline([(kk, steps[kk])
                        for kk in ('vectorisation', 'fill_missing',
                                   'classifier')])
        locations = steps['label_encoder'].classes_

        data = sources[1].window(interval, force_calculation=True)
        for tt, dd in data:
            yield StreamInstance(
                tt, {
                    locations[ii]: pp
                    for ii, pp in enumerate(clf.predict_proba(dd)[0])
                })
    def _execute(self, sources, alignment_stream, interval):
        for tt, rows in sources[0].window(interval, force_calculation=True):
            vals = defaultdict(list)
            
            for row in rows:
                try:
                    for kk, vv in iter(row.value):
                        if isinstance(vv, (int, float)):
                            vals[kk].append(vv)
                except AttributeError:
                    # This is not iterable, try to apply directly here
                    if isinstance(row, (int, float)):
                        vals[None].append(row)

            if len(vals) == 1 and None in vals:
                result = self.func(iter(vals[None]))
            else:
                result = {}
                for kk, vv in iter(vals):
                    x = self.func(vv)
                    if x is not None:
                        result[kk] = x
                # result = {kk: self.func(vv) for kk, vv in iter(vals)}
            if result is not None:
                yield StreamInstance(tt, result)
Example #7
0
 def _execute(self, source, interval):
     timestamp, data = source.window(interval,
                                     force_calculation=True).last()
     if self.element in data:
         for key, value in data[self.element].items():
             yield StreamMetaInstance(
                 StreamInstance(timestamp=timestamp, value=value), key)
Example #8
0
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         try:
             yield StreamInstance(time, data[self.index])
         except:
             logging.debug('The array could not be sliced with {}'.format(
                 self.index))
    def _execute(self, sources, alignment_stream, interval):
        max_interval = TimeInterval(MIN_DATE, interval.end)
        exp_list = {}
        for timestamp, value in sources[0].window(max_interval, force_calculation=True):
            if value['tier'] != "Experiment":
                continue
            d = deepcopy(value)
            mongo_id = d.pop('_id')
            trigger = d.pop('trigger')
            if trigger == 1:
                u = {'start': timestamp}
            else:
                u = {'end': timestamp}

            if mongo_id in exp_list:
                if u.keys()[0] in exp_list[mongo_id]:
                    raise ValueError("Duplicate {} triggers found for timestamp {}".format(trigger, timestamp))
                exp_list[mongo_id].update(u)
            else:
                d.update(u)
                exp_list[mongo_id] = d

        for i, doc in enumerate(exp_list.values()):
            if TimeInterval(doc['start'], doc['end']) in max_interval:
                yield StreamInstance(doc['end'], doc)
    def test_index_of_by_stream(self):
        w = basic_workflow(sys._getframe().f_code.co_name)

        aggregate_loc = channels.get_tool(
            name="index_of_by_stream",
            parameters=dict(index="kitchen")
        )

        # Create a stream with the single value "location" in it
        w.create_node(stream_name="selector_meta_data", channel=A, plate_ids=None)

        A.write_to_stream(stream_id=StreamId(name="selector_meta_data"),
                          data=StreamInstance(timestamp=utcnow(), value="location"))

        N = w.nodes
        w.create_factor(
            tool=aggregate_loc,
            sources=[N["selector_meta_data"], N["rss"]],
            sink=N["rss_kitchen"]
        )

        time_interval = TimeInterval(scripted_experiments[0].start, scripted_experiments[0].start + 2 * minute)
        w.execute(time_interval)

        key = h1 + (('location', 'kitchen'),) + wA

        assert all(a == b for a, b in zip(N['rss_kitchen'].streams[h1 + wA].window(time_interval).head(10),
                                          N['rss'].streams[key].window(time_interval).head(10)))
Example #11
0
    def _execute(self, sources, alignment_stream, interval):

        # Put all of the data in a dict of sorted lists (inefficient!)
        data = dict((source.stream_id,
                     sorted(source.window(interval, force_calculation=True), key=lambda x: x.timestamp))
                    for source in sources)

        # Create a set of all of the timestamps available (also inefficient!)
        timestamps = sorted(set(item.timestamp for d in data.values() for item in d))

        # maintain dict of indices where the timestamps appear
        last_timestamps = dict((stream_id, MIN_DATE) for stream_id in data)

        # Now loop through the timestamps, and aggregate over the aggregation plate
        for ts in timestamps:
            values = []
            for stream_id in data:
                for item in data[stream_id]:
                    if item.timestamp < last_timestamps[stream_id]:
                        continue
                    if item.timestamp < ts:
                        continue
                    if item.timestamp == ts:
                        values.append(item.value)
                    last_timestamps[stream_id] = item.timestamp
                    break
            yield StreamInstance(ts, self.func(values))
Example #12
0
    def _execute(self, sources, alignment_stream, interval):
        for tt, rows in sources[0].window(interval, force_calculation=True):
            values = defaultdict(list)

            for row in rows:
                # try:
                # for kk, vv in iter(row.value):
                for kk, vv in row.iteritems():
                    if isinstance(vv, (int, float)):
                        values[kk].append(vv)
                        # except AttributeError:
                        #     # This is not iterable, try to apply directly here
                        #     if isinstance(row, (int, float)):
                        #         values[None].append(row)
                    else:
                        # raise NotImplementedError
                        # TODO: Need to store other things like the uid for output
                        pass

            # if len(values) == 1 and None in values:
            #     yield StreamInstance(tt, self.func(iter(values[None])))
            # else:
            for kk, vv in values.iteritems():
                try:
                    result = self.func(kk, vv)
                    if result is not None:
                        v = {kk: result}
                        yield StreamInstance(tt, v)
                except KeyError:
                    pass
Example #13
0
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         d = {}
         for k, v in data.items():
             if k in self.keys:
                 d[k] = v
         yield StreamInstance(time, d)
Example #14
0
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         t6 = time - timedelta(seconds=6)
         t2 = time - timedelta(seconds=2)
         tap_list = []
         ok = True
         for i in range(len(data)):
             (t,magnitude) = data[i]
             if magnitude<1.8:
                 continue
             if (t<t6) or (t>t2):
                 ok = False
                 break # too high magnitude outside of the 4 sec window
             if magnitude<1.8:
                 continue
             if (i==0) or (i==len(data)):
                 ok = False
                 break # no taps counted at the ends of the window
             if (magnitude > data[i-1][1]) and (magnitude > data[i+1][1]):
                 tap_list.append(data[i])
         if ok and (len(tap_list)>=3):
             res = ''
             wearable = [w for (s,w) in sources[0].stream_id.meta_data if s=='wearable'][0]
             res = '\n'.join(['{0} {1:.2} {2:%Y-%m-%d %H:%M:%S.%f}'.format(wearable,tap.value,tap.timestamp) for tap in tap_list])+'\n'
             print(res)
             yield StreamInstance(time, dict(tap_list=tap_list,all_10_sec=data))
 def _execute(self, sources, interval):
     if self.categorical:
         for t, d in sources[0].window(interval, force_calculation=True):
             yield StreamInstance(t, dict(map(safe_key,
                                              Counter(d).items())))
     else:
         if self.breaks is not None:
             breaks = self.breaks
         else:
             breaks = [
                 self.first_break + i * self.break_width
                 for i in range(self.n_breaks)
             ]
         breaks = [-float('inf')] + breaks + [float('inf')]
         for t, d in sources[0].window(interval, force_calculation=True):
             yield StreamInstance(t, np.histogram(d, breaks)[0].tolist())
 def _execute(self, sources, alignment_stream, interval):
     if self.percentiles is not None:
         percentiles = self.percentiles
     else:
         percentiles = [i*100.0/self.n_segments for i in range(self.n_segments+1)]
     for t, d in sources[0].window(interval, force_calculation=True):
         yield StreamInstance(t, np.percentile(d, percentiles).tolist())
Example #17
0
    def _execute(self, sources, alignment_stream, interval):
        s1 = next(sources[1])

        # TODO: should the loop below be: for (t, data1) in sources[0].execute(interval)?
        for (t, data1) in sources[0]:
            (_, data2) = next(s1)
            # TODO: type checking key/value pairs?
            yield StreamInstance(t, data1 * data2)
Example #18
0
def reformat(doc):
    dt = doc.pop('datetime')
    if 'house_id' in doc:
        house_id = doc.pop('house_id')
    else:
        house_id = '1'
    return StreamMetaInstance(stream_instance=StreamInstance(dt, doc),
                              meta_data=('house', house_id))
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         dict_sum = self.start_dict
         if dict_sum is None:
             dict_sum = dict()
         if len(data)==0:
             yield StreamInstance(time, dict_sum)
         for item in data:
             for key in item.keys():
                 try:
                     dict_sum[key] = dict_sum[key] + item[key]
                 except KeyError:
                     if self.log_new_keys:
                         logging.warn('Novel key in ListDictSum: {}'.format(key))
                     if self.insert_new_keys:
                         dict_sum[key] = item[key]
         yield StreamInstance(time, dict_sum)
Example #20
0
    def _execute(self, sources, alignment_stream, interval):
        sliding_window = sources[0].window(interval, force_calculation=True)
        data = iter(sources[1].window(interval, force_calculation=True))
        
        window = []
        future = []
        
        for time, rel_window in sliding_window:
            lower = rel_window.start
            upper = rel_window.end
            
            # Prune the old data points from the window
            num_to_remove = 0
            for win_time, win_data in window:
                if lower < win_time <= upper: # MK: changed from lower <= win_time <= upper
                    break
                    
                num_to_remove += 1
            window = window[num_to_remove:]
            
            # Add those stolen from the future
            num_to_remove = 0
            for doc in future:
                fut_time, fut_data = doc
                
                # if lower <= fut_time <= upper: (MK: this was a bug because things in the far future were thrown away from the future
                #    break
                if fut_time > upper: # added by MK: if in the far future, then must remain in future
                    break

                num_to_remove += 1
                if fut_time >= lower:
                    window.append(doc)
            future = future[num_to_remove:]
            
            # Take data from the execute
            while True:
                try:
                    doc = next(data)
                    tt, dd = doc
                    
                    if lower < tt <= upper: # MK: changed from lower <= win_time <= upper
                        window.append(doc)
                        
                    elif tt > upper:
                        future.append(doc)
                        break
                        
                except StopIteration:
                    break

            value = [stream_instance.value for stream_instance in window]
            if len(value) > 0:
                yield StreamInstance(time, value)
            else:
                # TODO: Should we yield anything???
                # yield StreamInstance(time, {})
                pass
Example #21
0
    def _execute(self, sources, alignment_stream, interval):
        s0 = sources[0].window(interval, force_calculation=True)
        s1 = sources[1].window(interval, force_calculation=True)

        for (d0, d1) in zip(s0, s1):
            if d0.timestamp != d1.timestamp:
                raise ValueError("{} tool expects aligned timestamps".format(
                    self.name))
            yield StreamInstance(d0.timestamp, d0.value * d1.value)
 def _execute(self, sources, alignment_stream, interval):
     for time, data in sources[0].window(interval, force_calculation=True):
         max_value = None
         argmax = None
         for key in data.keys():
             if max_value is None or data[key] > max_value:
                 max_value = data[key]
                 argmax = key
         yield StreamInstance(time, argmax)
def reformat(doc):
    doc = deepcopy(doc)
    dt = doc.pop('datetime')
    if 'hid' in doc and doc['hid'] is not None:
        house_id = doc.pop('hid')
    else:
        house_id = '1'
    return StreamMetaInstance(stream_instance=StreamInstance(dt, doc),
                              meta_data=('house', house_id))
 def _execute(self, sources, alignment_stream, interval):
     if interval.start < self.first:
         interval.start = self.first
     n_strides = int((interval.start - self.first).total_seconds() // self._stride.total_seconds())
     t = self.first + n_strides * self._stride
     while t <= interval.end:
         if t > interval.start:
             yield StreamInstance(t, t)
         t += self._stride
Example #25
0
    def _execute(self, sources, alignment_stream, interval):
        sliding_window = sources[0].window(interval, force_calculation=True)
        data = iter(sources[1].window(interval, force_calculation=True))

        window = []
        future = []

        for time, rel_window in sliding_window:
            lower = rel_window.start
            upper = rel_window.end

            # Prune the old data points from the window
            num_to_remove = 0
            for win_time, win_data in window:
                if lower <= win_time <= upper:
                    break

                num_to_remove += 1
            window = window[num_to_remove:]

            # Add those stolen from the future
            num_to_remove = 0
            for doc in future:
                fut_time, fut_data = doc

                if lower <= fut_time <= upper:
                    break

                num_to_remove += 1
                window.append(doc)
            future = future[num_to_remove:]

            # Take data from the execute
            while True:
                try:
                    doc = next(data)
                    tt, dd = doc

                    if lower <= tt <= upper:
                        window.append(doc)

                    elif tt > upper:
                        future.append(doc)
                        break

                except StopIteration:
                    break

            # print interval.start, interval.end
            # print '\t', lower, upper
            # for datum in execute:
            #     print '\t\t{} {}'.format(datum.timestamp, datum.value)
            # print '\t', self.func(execute)
            # print

            yield StreamInstance(time, self.func(iter(window)))
Example #26
0
 def _execute(self, sources, alignment_stream, interval):
     for i, row in self.data.iterrows():
         dt = unix2datetime(row["dt"])
         if dt in interval:
             yield StreamInstance(
                 dt,
                 dict(camera_id=row["camera_id"],
                      exper_id=row["exper_id"],
                      person_id=row["person_id"],
                      wearable_id=row["wearable_id"]))
Example #27
0
 def _execute(self, sources, alignment_stream, interval):
     data = list(sources[0].window(interval, force_calculation=True))
     flattened = map(
         lambda x: dict(
             dict(timestamp=x.timestamp,
                  fold=x.value['localisation-experiment'],
                  location=next(iter(x.value['annotations']['Location']),
                                None)), **(x.value['rssi'])), data)
     df = pd.DataFrame(flattened)
     yield StreamInstance(interval.end, df)
 def _execute(self, sources, alignment_stream, interval):
     data = sources[0].window(interval, force_calculation=True)
     mappings = []
     for x in data:
         experiment_interval = TimeInterval(x.value['start'],
                                            x.value['end'])
         experiment_id = construct_experiment_id(experiment_interval)
         if experiment_id in self.experiment_ids:
             mappings.append((experiment_id, experiment_interval))
     yield StreamInstance(interval.end, mappings)
    def _execute(self, sources, alignment_stream, interval):
        source = sources[0]
        data = list(source.window(interval, force_calculation=True))
        classifier_name = dict(source.stream_id.meta_data)['localisation_model']

        if classifier_name == "lda":
            classifier = LinearDiscriminantAnalysis()
        elif classifier_name == "svm":
            classifier = OneVsRestClassifier(LinearSVC())
        else:
            raise NotImplementedError("Unknown classifier type {}".format(classifier_name))

        if not data:
            return

        yy_key = 'annotations'
        xx_key = 'rssi'
        ex_key = 'localisation-experiment'
        
        # TODO: change data to go from ['anno']['Location'] to just ['anno'}
        keep_inds = []
        for di, (tt, dd) in enumerate(data):
            exp = dd[ex_key]
            loc = list(dd[yy_key]['Location'])
            
            if len(loc) == 1 and loc[0] != 'MIX' and exp != 'MIX':
                keep_inds.append(di)

        folds = [data[ii].value[ex_key] for ii in keep_inds]
        train_x = [data[ii].value[xx_key] for ii in keep_inds]
        train_y = [list(data[ii].value[yy_key]['Location'])[0] for ii in keep_inds]
        # TODO: update ['anno']['Location'] keys format changed
        
        label_encoder = LabelEncoder()
        train_y_trans = label_encoder.fit_transform(train_y)
        
        param_dict = {
            'vectorisation': DictVectorizer(sparse=False),
            'fill_missing': FillZeros(self.nan_value),
            'classifier': classifier,
        }
        
        clf = Pipeline([(kk, param_dict[kk]) for kk in ('vectorisation', 'fill_missing', 'classifier')])
        clf.fit(train_x, train_y_trans)
        
        clf_serialised = serialise_pipeline(clf)
        clf_serialised['label_encoder'] = serialise_dict(label_encoder.__dict__)
        clf_serialised['performance'] = predefined_train_test_split(train_x, train_y_trans, folds, clf, label_encoder)

        experiment_ids = sources[0].stream_id.name.split('_')[-2:]
        clf_serialised['experiment_ids_str'] = '_'.join(experiment_ids)
        clf_serialised['experiment_interval'] = map(reconstruct_interval, experiment_ids)
        clf_serialised['tool_parameters'] = dict((x, self.__dict__[x]) for x in self.__dict__ if not x.startswith("_"))

        yield StreamInstance(interval.end, clf_serialised)
 def _execute(self, sources, alignment_stream, interval):
     data = list(sources[0].window(interval, force_calculation=True))
     flattened = map(lambda x: dict(dict(
         experiment_id=construct_experiment_id(TimeInterval(x.value['start'], x.value['end'])),
         start=x.value['start'],
         end=x.value['end'],
         annotator=x.value['annotator']
     ), **(x.value['notes'])), data)
     df = pd.DataFrame(flattened)
     df['id'] = range(1, len(df) + 1)
     yield StreamInstance(interval.end, df)