Beispiel #1
0
def DeriveChange(data_frame, params=None):
    logger.debug(
        "initialized DeriveChange. Use get_params() to see parameter values")
    if params == None:
        params = {}
        params["func_params"] = {
            "window_len": ("length of averaging window", 1, False),
            "angle_change":
            ("if the change is between angles, we return the signed smaller angle between the two headings",
             False, False)
        }
        params["measure_rules"] = {
            "target_measure":
            ("name of the target measure", "measure_name", True),
            "output_name": ("name of returned measure", "output_name", True)
        }
        return params

    logger.debug("transforming data to %s" %
                 (params["measure_rules"]["output_name"]))
    if "window_len" in params["func_params"]:
        window_len = params["func_params"]["window_len"]
    else:
        window_len = 1
    target_array = data_frame[params["measure_rules"]["target_measure"]].values
    diffed_data = diff_data(target_array, window_len,
                            params["func_params"]["angle_change"])
    return pd.DataFrame(data=diffed_data,
                        columns=[params["measure_rules"]["output_name"]],
                        index=data_frame.index[:-1])
Beispiel #2
0
def ButterLowpass(data_frame, params=None):
    """Class to apply a Butterworth lowpass filter to data"""
    logger.debug("Calculating ButterLowpass.")
    if params == None:
        params = {
            "order": ("order of the filter", 2, True),
            "nyquist": ("Wn parameter from scipy.signal.butter", 0.05, True),
            "filter_name":
            ("name to append to all filtered parameters", "_buttered", False)
        }
        return params
    elif "filter_name" not in params:
        params["filter_name"] = "_buttered"

    logger.debug("transforming_data")
    buttered_data = np.zeros(data_frame.shape)
    output_names = []
    for col_ind, df_col in enumerate(data_frame):
        buttered_data[:, col_ind] = butter_data(data_frame[df_col].values,
                                                params["order"],
                                                params["nyquist"])
        output_names.append(df_col + params["filter_name"])

    return pd.DataFrame(data=buttered_data,
                        columns=output_names,
                        index=data_frame.index)
Beispiel #3
0
def compare_threshold(data_array,
                      comparison_operator,
                      comparision_val,
                      absolute_compare=False):
    """
    Fucntion for comparing an array to a values with a binary operator
    :param data_array: input data
    :type data_array: numpy array
    :param comparison_operator: string representation of the binary operator for comparison
    :type comparison_operator: str
    :param comparision_val: The value to be compared against
    :type comparision_val: float
    :param absolute_compare: specifying whether to compare raw value or absolute value
    :type absolute_compare: Boolean
    :return: the indices where the binary operator is true
    :rtype: numpy array
    """
    logger.debug("comparing: %s %d" % (comparison_operator, comparision_val))
    if absolute_compare:
        data_array = np.abs(data_array)
    comparisons = {
        "==": np.equal,
        "!=": np.not_equal,
        ">=": np.greater_equal,
        "<=": np.less_equal,
        ">": np.greater,
        "<": np.less
    }
    cur_comp = comparisons[comparison_operator]
    match_inds = cur_comp(np.nan_to_num(data_array), comparision_val)
    return match_inds
Beispiel #4
0
 def select_transform(transform_type, transform_name):
     """Method to grab the transform function from the correct module"""
     logger.debug("Selecting transform function")
     available_transforms = {}
     available_transforms["filter_data"] = {
         "ButterLowpass": ButterLowpass,
         "WindowAverage": WindowAverage,
         "dummy": dummy_function
     }
     available_transforms["derive_param"] = {
         "DeriveSlope": DeriveSlope,
         "DeriveChange": DeriveChange,
         "DeriveCumsum": DeriveCumsum,
         "DeriveDistance": DeriveDistance,
         "DeriveHeading": DeriveHeading,
         "DeriveWindowSum": DeriveWindowSum,
         "DeriveScaled": DeriveScaled,
         "DeriveInBox": DeriveInBox,
         "DeriveThreshold": DeriveThreshold,
         "DeriveLogicalCombination": DeriveLogicalCombination,
     }
     available_transforms["detect_event"] = {
         "DetectThreshold": DetectThreshold
     }
     return available_transforms[transform_type][transform_name]
Beispiel #5
0
 def __init__(self, template, dstreams):
     logger.debug("init BStream")
     super().__init__()
     self.dstreams = dstreams
     self["template_id"] = template["template_id"]
     self._load_from_dict(template)
     self["stream_token"] = str(template["stream_token"])
Beispiel #6
0
    def process_data(self, dstream_list, token):
        """
        Wrapper method for asynchronously processing data.
        :param dstream_list: list of dstreams with raw data
        :type dstream_list: list of dicts
        :param token: stream token
        :type token: string
        """
        logger.debug("process_data_async")
        st = time.time()

        # retrieve most recent versioned dstream template
        template = dstream_list[0]

        # create bstream for dstream list
        bstream = self._list_to_bstream(template, dstream_list)

        # filter bstream data
        bstream.apply_filters()

        # apply derived param transforms
        bstream.apply_dparam_rules()

        # apply event transforms
        bstream.find_events()
        # post events to server
        self._post_parsed_events(bstream)
        self._post_dataframe(bstream["stream_token"], bstream["measures"])

        print("whoop WHOOOOP", time.time() - st, len(bstream["timestamp"]))
Beispiel #7
0
 def __init__(self):
     """
     Initializes and empty DStream with a unique stream token. All the other expected keys are
     initialized as empty data structures of the desired type.
     """
     self["stream_name"] = None
     self["user_description"] = None
     self["version"] = 0
     self["stream_token"] = str(uuid.uuid1())
     self["source_key"] = None
     self["template_id"] = str(uuid.uuid1())
     self["storage_rules"] = {}
     self["ingest_rules"] = {}
     self["engine_rules"] = {}
     self["timestamp"] = None
     self["measures"] = {}
     self["fields"] = {}
     self["user_ids"] = {}
     self["tags"] = {}
     self["foreign_keys"] = []
     self["filters"] = []
     self["dparam_rules"] = []
     self["event_rules"] = {}
     self["data_rules"] = {}
     logger.debug("DStream initialize")
Beispiel #8
0
def DeriveLogicalCombination(data_frame, params=None):
    logger.debug("Starting DeriveLogicalCombination")
    if params == None:
        params = {}
        params["func_params"] = {
            "combiner":
            ("Either AND or OR string specifying which operator to use", "AND",
             True)
        }
        params["measure_rules"] = {
            "first_measure":
            ("name of first measure to be combined", "measure_name", True),
            "second_measure":
            ("name of second measure to be combined", "measure_name", True),
            "output_name": ("name of returned measure", "output_name", True)
        }

    logger.debug("Combining {} and {} data to {}".format(
        params["measure_rules"]["first_measure"],
        params["measure_rules"]["second_measure"],
        params["measure_rules"]["output_name"]))
    first_array = data_frame[params["measure_rules"]["first_measure"]].values
    second_array = data_frame[params["measure_rules"]["second_measure"]].values
    print(first_array, second_array)
    combined = logical_combine(first_array, second_array,
                               params["func_params"]["combiner"])
    return pd.DataFrame(data=combined,
                        columns=[params["measure_rules"]["output_name"]],
                        index=data_frame.index)
Beispiel #9
0
    def partition_data(self, list_of_partitions, logical_comparison="AND"):
        """This function takes a list of tuples of partition parameters used by partition_rows() and
        returns all rows from the measure DataFrame that meet the logical AND or logical OR of those conditions"""
        logger.debug("building parition rows")
        if logical_comparison == "AND":
            start_bools = np.ones((self["measures"].shape[0], ), dtype=bool)
        elif logical_comparison == "OR":
            start_bools = np.zeros((self["measures"].shape[0], ), dtype=bool)
        else:
            raise ValueError("{} is not a supported logical comparison".format(
                logical_comparison))

        for partition in list_of_partitions:
            new_inds = self.partition_rows(partition[0], partition[1],
                                           partition[2])
            if logical_comparison == "AND":
                start_bools = np.logical_and(start_bools, new_inds)
            elif logical_comparison == "OR":
                start_bools = np.logical_or(start_bools, new_inds)
            else:
                raise ValueError(
                    "{} is not a supported logical comparison".format(
                        logical_comparison))

        return self["measures"][start_bools]
Beispiel #10
0
def DeriveThreshold(data_frame, params=None):
    logger.debug("Starting DeriveThreshold")
    if params == None:
        params = {}
        params["func_params"] = {
            "threshold_value": ("value to compare against", 0, True),
            "comparison_operator": ("one of == != >= <= > <", "==", True),
            "absolute_compare":
            ("whether to compare against absolute value instead of raw value",
             False, False)
        }
        params["measure_rules"] = {
            "target_measure":
            ("name of the target measure", "measure_name", True),
            "output_name": ("name of returned measure", "output_name", True)
        }
    logger.debug("transforming data to %s" %
                 (params["measure_rules"]["output_name"]))
    target_array = data_frame[params["measure_rules"]["target_measure"]].values
    threshold_bool = compare_threshold(
        target_array, params["func_params"]["comparison_operator"],
        params["func_params"]["threshold_value"],
        params["func_params"]["absolute_compare"])
    return pd.DataFrame(data=threshold_bool,
                        columns=[params["measure_rules"]["output_name"]],
                        index=data_frame.index)
Beispiel #11
0
def DeriveWindowSum(data_frame, params=None):
    logger.debug("Starting DeriveWindowSum.")

    if params == None:
        params = {}
        params["func_params"] = {
            "window_len": ("window size for summing", 2, True)
        }
        params["measure_rules"] = {
            "target_measure":
            ("name of the target measure", "measure_name", True),
            "output_name": ("name of returned measure", "output_name", True)
        }
        return params

    logger.debug("transforming data to %s" %
                 (params["measure_rules"]["output_name"]))
    if "window_len" in params["func_params"]:
        window_len = params["func_params"]["window_len"]
    else:
        window_len = 1
    target_array = data_frame[params["measure_rules"]["target_measure"]].values
    summed_data = window_sum(target_array, window_len)
    return pd.DataFrame(data=summed_data,
                        columns=[params["measure_rules"]["output_name"]],
                        index=data_frame.index)
Beispiel #12
0
def DeriveInBox(data_frame, params=None):
    logger.debug("Starting DeriveInBox.")

    if params == None:
        params = {}
        params["func_params"] = {
            "upper_left_corner":
            ("location of upper left corner", (0, 1), True),
            "lower_right_corner":
            ("location of lower right corner", (1, 0), True)
        }
        params["measure_rules"] = {
            "spatial_measure":
            ("name of geo-spatial measure", "measure_name", True),
            "output_name": ("name of returned measure", "measure_name", True)
        }
        return params

    logger.debug("transforming data to %s" %
                 (params["measure_rules"]["output_name"]))
    position_array = pd.DataFrame(
        data_frame[params["measure_rules"]["spatial_measure"]].tolist()).values
    box_bool = in_box(position_array,
                      params["func_params"]["upper_left_corner"],
                      params["func_params"]["lower_right_corner"])
    return pd.DataFrame(data=box_bool,
                        columns=[params["measure_rules"]["output_name"]],
                        index=data_frame.index)
Beispiel #13
0
def DeriveSlope(data_frame, params=None):
    logger.debug("Starting DeriveSlope.")

    if params == None:
        params = {}
        params["func_params"] = {
            "window_len": ("length of averaging window", 1, False)
        }
        params["measure_rules"] = {
            "rise_measure":
            ("measure y values (or rise in rise/run calculation of slope)",
             "measure_name", True),
            "run_measure":
            ("measure containing x values (or run in rise/run calculation of slope)",
             "measure_name", True),
            "output_name": ("name of returned measure", "output_name", True)
        }
        return params

    logger.debug("transforming data to %s" %
                 (params["measure_rules"]["output_name"]))
    if "window_len" in params["func_params"]:
        window_len = params["func_params"]["window_len"]
    else:
        window_len = 1
    xrun = data_frame[params["measure_rules"]["run_measure"]].values
    yrise = data_frame[params["measure_rules"]["rise_measure"]].values
    smaller_len = np.min([xrun.shape[0], yrise.shape[0]])
    sloped = sloper(yrise[:smaller_len, ], xrun[:smaller_len, ], window_len)
    return pd.DataFrame(data=sloped,
                        columns=[params["measure_rules"]["output_name"]],
                        index=data_frame.index)
Beispiel #14
0
def bearing(position_array, window_len, units="deg"):
    """
    Calculates the angle between lat lon points
    :param position_array: input vector of lat lon points
    :type position_array: N x 2 numpy array
    :param window_len: Length of window for averaging
    :type window_len: int
    :param units: String for output units. Currently 'mi' and 'km' supported
    :type units: str
    :return: the angle between consecutive latlon points
    :rtype: (N - 1) x 1 numpy array
    """
    logger.debug("finding bearing of vector")
    lat1 = position_array[:-1, 0]
    lat2 = position_array[1:, 0]
    lon1 = position_array[:-1, 1]
    lon2 = position_array[1:, 1]
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    dlon = lon1 - lon2
    first_val = np.sin(dlon) * np.cos(lat2)
    second_val = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(
        lat2) * np.cos(dlon)
    cur_bear = np.arctan2(first_val, second_val)
    if units == "deg":
        cur_bear = (np.rad2deg(cur_bear) + 180.0) % 360 - 180
    if window_len > 1:
        cur_bear = window_data(cur_bear, window_len)
    return cur_bear
Beispiel #15
0
def great_circle(position_array, window_len=1, units="mi"):
    """
    Function to calculate the great circle distance between consecutive samples in lat lon vector
    :param position_array: input vector of lat lon points
    :type position_array: N x 2 numpy array
    :param window_len: length of window for averaging output
    :type window_len: int
    :param units: String for output units. Currently 'mi' and 'km' supported
    :type units: str
    :return: distances between consecutive points
    :rtype: (N-1) x 1 numpy array

    """
    logger.debug("calculating great circle distance")
    lat1 = position_array[:-1, 0]
    lat2 = position_array[1:, 0]
    lon1 = position_array[:-1, 1]
    lon2 = position_array[1:, 1]
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    dlat = lat1 - lat2
    dlon = lon1 - lon2
    inner_val = np.sin(
        dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    outer_val = 2 * np.arcsin(np.sqrt(inner_val))
    if units == "mi":
        earth_diameter = 3959
    elif units == "km":
        earth_diameter = 6371

    great_dist = outer_val * earth_diameter
    if window_len > 1:
        great_dist = window_data(great_dist, window_len)
    return great_dist
Beispiel #16
0
 def _aggregate_uids(self):
     logger.debug("aggregating uids")
     uids = [s["user_ids"] for s in self.dstreams]
     self["user_ids"] = {
         uidkey: [i[uidkey] for i in uids]
         for uidkey, v in self["user_ids"].items()
     }
Beispiel #17
0
 def consume(self):
     """  """
     tk['Consumer.consume : self.consumer.start'].start()
     self.consumer.start()  #auto-start
     tk['Consumer.consume : self.consumer.start'].stop()
     for msg in self.consumer:
         if msg is not None:
             logger.debug(str(msg.value) + ": {}".format(msg.offset))
Beispiel #18
0
 def find_events(self):
     logger.debug("finding events")
     self["events"] = {}
     for event_name, event_rule in self["event_rules"].items():
         # print("finding event {}".format(event_name))
         self["events"][event_name] = self.apply_transform(
             event_rule["partition_list"], event_rule["measure_list"],
             event_rule["transform_type"], event_rule["transform_name"],
             event_rule["param_dict"], event_rule["logical_comparison"])
Beispiel #19
0
    def aggregate(self):
        logger.debug("aggregating everything")
        self._aggregate_uids()
        self._aggregate_ts()
        self._aggregate_fields()
        self._aggregate_tags()
        self._measure_df()

        return self
Beispiel #20
0
 def get(self, option, section=None, default=None):
     try:
         if section is None:
             option, section = self.__get_option_name(option)
         value = self._cfg.get(section, option)
     except (NoSectionError, NoOptionError, ConfigParserGeneralError) as err:
         logger.info("Configuration parameter didn't exist, returning the default value." % err.message)
         return default
     logger.debug("Read configuration parameter: (section=%s) %s=%s" % (section, option, value))
     return value
Beispiel #21
0
 def load_from_json(self, json_file):
     """
     The standard method for loading data from an existing json dict.
     :param json_file: the json dict containing data to be loaded into our DStream
     :type json_file: dict
     """
     for key in json_file.keys():
         if key != 'stream_token' and key != 'template_id':
             self[key] = json_file[key]
             logger.debug("added key %s" % (key))
Beispiel #22
0
 def apply_dparam_rules(self):
     logger.debug("deriving parameters")
     self["derived_measures"] = {}
     for dparam_rule in self["dparam_rules"]:
         # print("deriving {}".format(dparam_rule["param_dict"]["measure_rules"]["output_name"]))
         self.apply_transform(dparam_rule["partition_list"],
                              dparam_rule["measure_list"],
                              dparam_rule["transform_type"],
                              dparam_rule["transform_name"],
                              dparam_rule["param_dict"],
                              dparam_rule["logical_comparison"])
Beispiel #23
0
 def apply_filters(self):
     logger.debug("applying filters")
     self["filter_measures"] = {}
     for filter_rule in self["filters"]:
         # logger.debug("applying filter {}".format(filter_rule["param_dict"]["filter_name"]))
         self.apply_transform(filter_rule["partition_list"],
                              filter_rule["measure_list"],
                              filter_rule["transform_type"],
                              filter_rule["transform_name"],
                              filter_rule["param_dict"],
                              filter_rule["logical_comparison"])
Beispiel #24
0
def cumsum(data_array, offset=0):
    """
    Calculate the cumulative sum of a vector
    :param data_array: data to be summed
    :type data_array: numpy array
    :param offset: starting value for sum
    :type offset: float
    :return: the cumulative sum of the data_array
    :rtype: numpy array
    """
    logger.debug("cumsum")
    return np.cumsum(data_array) + offset
Beispiel #25
0
 def produce(self, dmsg):
     """ Produce to given topic w/ partition_key and log e. 1k msg.
     :param dmsg: Message to produce
     """
     tk['Producer.produce'].start()
     bcount = str(self.count).encode()
     tk['Producer.produce : self.producer.produce'].start()
     self.producer.produce(dmsg, partition_key=bcount)
     tk['Producer.produce : self.producer.produce'].stop()
     logger.debug("Just produced a message")
     self.count += 1
     tk['Producer.produce'].stop()
Beispiel #26
0
    def _post_events(event_data):
        """
        Sends post request containing event data to API
        :param event_data: event data (individual event)
        :type event_data: dict
        :return: request status
        :rtype: string
        """
        endpoint = 'http://{}:{}/new_event'.format(config['server_host'],
                                                   config['server_port'])
        logger.debug(event_data)
        r = requests.post(endpoint, json=event_data)

        return {'request_status': r.status_code}
Beispiel #27
0
    def _measure_df(self):
        logger.debug("aggregating into DataFrame")
        all_measures = [s["measures"] for s in self.dstreams]
        self["measures"] = {
            m: [i[m]['val'] for i in all_measures]
            for m, v in self["measures"].items()
        }
        self["measures"]["timestamp"] = self["timestamp"]
        for user_id, value in self["user_ids"].items():
            self["measures"][user_id] = value

        self["measures"]["tags"] = self["tags"]
        self["measures"]["fields"] = self["fields"]

        self["measures"] = pd.DataFrame.from_dict(self["measures"])
Beispiel #28
0
def euclidean_dist(position_array, window_len=1):
    """
    Function to calculate euclidean distance between consecutive samples in a positional vector
    :param position_array: input vector of positions
    :type position_array: N x 2 numpy array
    :param window_len: length of window for averaging output
    :type window_len: int
    :return: distances between consecutive points
    :rtype: (N-1) x 1 numpy array
    """
    logger.debug("calculating euclidean distance")
    euclid_array = np.sqrt(np.sum(np.diff(position_array, axis=0)**2, axis=1))
    if window_len > 1:
        euclid_array = window_data(euclid_array, window_len)
    return euclid_array
Beispiel #29
0
def logical_combine(array1, array2, combiner):
    """
    Function for creating elementwise AND or OR of two vectors
    :param array1: First array for combination
    :type array1: (n,1) numpy array
    :param array2: Second array for combination
    :type array2: (n,1) numpy array
    :param combiner: "AND" or "OR" specifying which method to combine
    :type combiner: str
    :return: Boolean array combining the two inputs
    :rtype: (n,1) boolean numpy array
    """
    logger.debug(f"Combining with{combiner}")
    combining_func = {"AND": np.logical_and, "OR": np.logical_or}
    return combining_func[combiner](array1, array2)
Beispiel #30
0
    def _post_dataframe(stream_token, dataframe):
        """
        Sends post request containing event data to API
        :param event_data: event data (individual event)
        :type event_data: dict
        :return: request status
        :rtype: string
        """
        endpoint = 'http://{}:{}/data_storage'.format(config['server_host'],
                                                      config['server_port'])
        logger.debug(dataframe)
        r = requests.post(endpoint,
                          data=pickle.dumps((stream_token, dataframe)))

        return {'request_status': r.status_code}