Ejemplo n.º 1
0
def deleter(functionNode):
    """
        delete values, events, and annotations of a time outside of given annotation(s)
    """
    logger = functionNode.get_logger()
    logger.info("==>>>> deleter " + functionNode.get_browse_path())
    progressNode = functionNode.get_child("control").get_child("progress")

    elements = functionNode.get_child("elements").get_leaves()
    whiteList = functionNode.get_child(
        "whiteListAnnotationSelection").get_leaves()
    tags = functionNode.get_child("tags").get_value()
    whiteList = mh.filter_annotations(whiteList, tags)

    progressNode.set_value(0)
    total = len(elements)

    for idx, element in enumerate(elements):
        progressNode.set_value(float(idx) / float(total))
        logger.debug(
            f"work on {element.get_browse_path()} {element.get_type()} ")

        if element.get_type() == "annotation":
            if element.get_child("type").get_value() == "time":
                if not is_inside(element, whiteList):
                    element.delete()
        elif element.get_type() == "timeseries":
            data = element.get_time_series()  # get full data
            mask = mh.annotations_to_class_vector(
                whiteList, data["__time"],
                ignoreTags=[])  #label for inside, nan for outside
            mask = numpy.isfinite(mask)
            values = data["values"][mask]
            times = data["__time"][mask]
            element.set_time_series(values, times)
        elif element.get_type() == "eventseries":
            data = element.get_event_series()
            mask = mh.annotations_to_class_vector(
                whiteList, data["__time"],
                ignoreTags=[])  #label for inside, nan for outside
            mask = numpy.isfinite(mask)
            values = data["values"][mask]
            times = data["__time"][mask]
            element.set_event_series(values, times)
        elif element.get_type() == "variable":
            element.set_value(None)
        else:
            logger.error(
                f"cannot process {element.get_name()} type {element.get_type()}"
            )

    return True
Ejemplo n.º 2
0
def score_all(functionNode):
    """
        score all thresholds again by using the stream implementation
        #works only on context of the class object
    """
    logger = functionNode.get_logger()
    logger.debug("score_all")
    progressNode = functionNode.get_child("control").get_child("progress")
    progressNode.set_value(0)
    model = functionNode.get_model()  # for the model API
    annos = functionNode.get_child("annotations").get_leaves()
    annos = [
        anno for anno in annos if anno.get_child("type").get_value() == "time"
    ]  #only the time annotations
    variableIds = functionNode.get_child(
        "variables").get_leaves_ids()  # the variableids to work on
    try:
        overWrite = functionNode.get_child("overWrite").get_value()
    except:
        overWrite = True

    obj = functionNode.get_parent().get_object()
    obj.reset(
    )  #read the new thresholds into the object!! this also affects parallel streaming processes

    # for each id (variable) that has threshold(s)
    # take the values and times of that varialbe
    # find out the annotations we need, create the stream data blob, send it over
    progressStep = 1 / float(len(obj.get_thresholds()))
    total = None

    for id, thresholdsInfo in obj.get_thresholds().items(
    ):  # thresholds is a dict of {id: {tag:{"min":0,"max":1}, tag2:{} .. ,id2:{}}
        if id not in variableIds:
            continue  # skip this one, is not selected
        progressNode.set_value(progressNode.get_value() + progressStep)
        var = model.get_node(id)
        data = var.get_time_series()
        times = data["__time"]
        #now produce the interesting states
        blob = {
            "type": "timeseries",
            "data": {
                "__time": times,
                id: data["values"],
                "__states": {}
            }
        }
        for state in thresholdsInfo.keys(
        ):  #iterate over the states where the variable has special thresholds
            myAnnos = mh.filter_annotations(annos, state)
            stateMask = mh.annotations_to_class_vector(myAnnos, data["__time"])
            stateMask = numpy.isfinite(stateMask)
            blob["data"]["__states"][state] = stateMask

        #now we have prepared a data and state blob, we will now score by feeding it into the stream scorer
        #del blob["data"]["__states"]#for test, now
        blob = obj.feed(blob)
        #now the blob contains more entries, e.g. the score variable id and the according scores, that is what we want
        for blobId, values in blob["data"].items():
            if blobId not in ["__time", id, "__states"]:
                #this is the score, overwrite the whole thing
                scoreNode = model.get_node(blobId)
                if scoreNode.get_name() == "_total_score":
                    continue  # this is the combined result of several variables going into the stream scoring, not relevant here

                scoreNode.set_time_series(
                    values=values, times=times
                )  # xxx is set ok here, or do we need "insert" to make sure there has not been changed in the meantime?
                model.notify_observers(scoreNode.get_parent().get_id(),
                                       "children")  # we trigger

                # build the total score:
                # merge in the new times, resample the total score, resampel the local score, then merge them
                # the merge function will use the new values whereever there is one (empty fields are named "nan"
                #  for the total score, we need a resampling to avoid the mixing of results e.g.
                # two sensor have different result during a given interval, but different times, if we just merge
                # we get a True, False, True,False mixture
                # so we build the merge vector, first resample then merge

                values[numpy.isfinite(
                    values)] = -1  # set -1 for all out of limit
                if type(total) is type(None):
                    total = TimeSeries(values=values, times=times)
                else:
                    local = TimeSeries(values=values, times=times)
                    total.merge(
                        local
                    )  # the merge resamples the incoming data to the existing time series, NaN will be replaced by new values,
    # finally, write the total
    # if the overWrite is True, we replace, otherwise we merge with the existing, previous result
    totalScoreNode = functionNode.get_parent().get_child("output").get_child(
        "_total_score")
    if overWrite:
        totalScoreNode.set_time_series(values=total.get_values(),
                                       times=total.get_times())
    else:
        totalScoreNode.merge_time_series(values=total.get_values(),
                                         times=total.get_times())

    return True
Ejemplo n.º 3
0
def logistic_regression(functionNode):
    logger = functionNode.get_logger()
    m = functionNode.get_model()
    logger.info("==>>>> in logisticregression_ " +
                functionNode.get_browse_path())

    # now get the input and outputs
    inputNodes = [
        node for node in functionNode.get_child("input").get_leaves()
        if node.get_type() == "timeseries"
    ]
    for node in inputNodes:
        logger.debug("input node" + node.get_browse_path())

    progressNode = functionNode.get_child("control").get_child("progress")
    progressNode.set_value(0)
    outputNode = functionNode.get_child("output").get_leaves()[0]
    logger.debug("outputnode " + outputNode.get_browse_path())

    annotations = functionNode.get_child("annotations").get_leaves()

    if annotations:
        for anno in annotations:
            logger.debug("annotation " + anno.get_browse_path())
    """
        now prepare the data for processing:

        1.1) define the sample times "times" from the data or generated
        1.2) use the map to match the tag labels to values 
        1.3) resample and select data based on annotations, region and the "times"
        1.4) train the model, score on region or full data 
    """

    # two ways to define the sampling:
    # if resamplePeriod is given, we take the interval for the sampling, if not given
    # we assume all data from the same timing, we take the times from the first variable and do not resample the data
    period = functionNode.get_child("resamplePeriod").get_value()
    times = inputNodes[0].get_time_series()["__time"]
    if period:
        times = numpy.arange(times[0], times[-1], period)

    # get the annotation map
    autoCreateCategoryMapChild = functionNode.get_child(
        "autoCreateCategoryMap")
    if autoCreateCategoryMapChild:
        if autoCreateCategoryMapChild.get_value() == True:
            #must generate the tagsmap now
            tags = set()
            for anno in annotations:
                if anno.get_child("tags"):
                    tags.update(anno.get_child("tags").get_value())
            tagsDict = {tag: str(idx) for idx, tag in enumerate(tags)}
            functionNode.get_child("categoryMap").set_value(tagsDict)

    tagsMap = functionNode.get_child(
        "categoryMap").get_value()  # pick the category mapping from the model

    labels = mh.annotations_to_class_vector(annotations,
                                            times,
                                            regionTag="region",
                                            tagsMap=tagsMap)
    inputMask = numpy.isfinite(labels)

    trainingData = []
    # now grab the values from the columns
    for node in inputNodes:
        #values = numpy.asarray(node.get_value())
        values = node.get_time_series(resampleTimes=times)["values"]
        trainingData.append(list(values[inputMask]))
    table = numpy.stack(trainingData, axis=0)

    progressNode.set_value(0.3)
    #
    # now fit the model
    #
    model = LogisticRegression()
    model.fit(table.T, labels[inputMask])
    progressNode.set_value(0.6)
    #
    # scoring
    #
    # we score on the whole times or the selection by a region
    # do  we have a region?
    regions = mh.filter_annotations(annotations, "region")
    if regions:
        regionMask = numpy.isfinite(
            mh.annotations_to_class_vector(regions, times, ignoreTags=[]))
        scoreTimes = times[regionMask]
    else:
        scoreTimes = times

    # now grab the values from the columns
    scoreMask = numpy.full(len(times), True)
    for node in inputNodes:
        data = node.get_time_series(resampleTimes=scoreTimes)["values"]
        scoreMask = scoreMask & numpy.isfinite(data)

    #now filter only the all finite areas
    scoreTimes = scoreTimes[scoreMask]

    scoreData = []
    for node in inputNodes:
        data = node.get_time_series(resampleTimes=scoreTimes)["values"]
        scoreData.append(data)

    scoreTable = numpy.stack(scoreData, axis=0)
    score = model.predict(scoreTable.T)

    #write back the output
    m.time_series_set(outputNode.get_id(), times=scoreTimes, values=score)
    progressNode.set_value(1)

    return True