Python get_full_cass_dataset Examples, util.cass.get_full_cass_dataset Python Examples

Example #1

0

Show file

File: stream_dataset.py Project: JeffRoy/stream_engine

    def get_dataset(self, time_range, limit, provenance_metadata, pad_forward, deployments, request_id=None):
        """
        :param time_range:
        :param limit:
        :param provenance_metadata:
        :param pad_forward:
        :param deployments:
        :param request_id:
        :return:
        """
        cass_locations, san_locations, messages = get_location_metadata(self.stream_key, time_range)
        provenance_metadata.add_messages(messages)
        # check for no data
        datasets = []
        total = float(san_locations.total + cass_locations.total)
        san_percent = cass_percent = 0
        if total != 0:
            san_percent = san_locations.total / total
            cass_percent = cass_locations.total / total

        if pad_forward:
            # pad forward on some datasets
            datasets.append(self.get_lookback_dataset(self.stream_key, time_range, deployments, request_id))

        if san_locations.total > 0:
            # put the range down if we are within the time range
            t1 = max(time_range.start, san_locations.start_time)
            t2 = min(time_range.stop, san_locations.end_time)
            san_times = TimeRange(t1, t2)
            if limit:
                datasets.append(fetch_nsan_data(self.stream_key, san_times, num_points=int(limit * san_percent),
                                                location_metadata=san_locations))
            else:
                datasets.append(fetch_full_san_data(self.stream_key, san_times, location_metadata=san_locations))
        if cass_locations.total > 0:
            t1 = max(time_range.start, cass_locations.start_time)
            t2 = min(time_range.stop, cass_locations.end_time)
            # issues arise when sending cassandra a query with the exact time range.
            # Data points at the start and end will be left out of the results.  This is an issue for full data
            # queries, to compensate for this we add .1 seconds to the given start and end time
            t1 -= .1
            t2 += .1
            cass_times = TimeRange(t1, t2)
            if limit:
                datasets.append(fetch_nth_data(self.stream_key, cass_times, num_points=int(limit * cass_percent),
                                               location_metadata=cass_locations, request_id=request_id))
            else:
                datasets.append(get_full_cass_dataset(self.stream_key, cass_times,
                                                      location_metadata=cass_locations, request_id=request_id))
        return compile_datasets(datasets)

Example #2

0

Show file

def main(args):
    if len(args) != 3:
        usage("Expecting 3 arguments, got " + str(len(args)))
        return

    # Extract the arguments
    old_sk_vals = args[0]
    new_sk_vals = args[1]
    time_stamps = args[2]

    # Extract the refdes, method and stream id from the stream_key values
    (old_refdes, old_method, old_stream_id) = split_sk_vals("old", old_sk_vals)
    (new_refdes, new_method, new_stream_id) = split_sk_vals("new", new_sk_vals)
    if old_refdes is None or new_refdes is None:
        return

    # Extract the subsite, node and sensor from the refdes values
    (old_subsite, old_node, old_sensor) = split_refdes("old", old_refdes)
    (new_subsite, new_node, new_sensor) = split_refdes("new", new_refdes)
    if old_subsite is None or new_subsite is None:
        return

    # Create StreamKeys
    old_stream_key = get_stream_key("old", old_subsite, old_node, old_sensor,
                                    old_method, old_stream_id)
    new_stream_key = get_stream_key("new", new_subsite, new_node, new_sensor,
                                    new_method, new_stream_id)
    if old_stream_key is None or new_stream_key is None:
        return

    # Use the time stamp ranges to create a TimeRange
    time_range = get_time_range(time_stamps)
    if time_range is None:
        return

    initialize_worker()
    dataset = get_full_cass_dataset(old_stream_key,
                                    time_range,
                                    keep_exclusions=True)
    insert_dataset(new_stream_key, dataset)

Example #3

0

Show file

    def get_dataset(self,
                    time_range,
                    limit,
                    provenance_metadata,
                    pad_forward,
                    deployments,
                    request_id=None):
        """
        :param time_range:
        :param limit:
        :param provenance_metadata:
        :param pad_forward:
        :param deployments:
        :param request_id:
        :return:
        """
        cass_locations, san_locations, messages = get_location_metadata(
            self.stream_key, time_range)
        provenance_metadata.add_messages(messages)
        # check for no data
        datasets = []
        total = float(san_locations.total + cass_locations.total)
        san_percent = cass_percent = 0
        if total != 0:
            san_percent = san_locations.total / total
            cass_percent = cass_locations.total / total

        if pad_forward:
            # pad forward on some datasets
            datasets.append(
                self.get_lookback_dataset(self.stream_key, time_range,
                                          deployments, request_id))

        if san_locations.total > 0:
            # put the range down if we are within the time range
            t1 = max(time_range.start, san_locations.start_time)
            t2 = min(time_range.stop, san_locations.end_time)
            san_times = TimeRange(t1, t2)
            if limit:
                datasets.append(
                    fetch_nsan_data(self.stream_key,
                                    san_times,
                                    num_points=int(limit * san_percent),
                                    location_metadata=san_locations))
            else:
                datasets.append(
                    fetch_full_san_data(self.stream_key,
                                        san_times,
                                        location_metadata=san_locations))
        if cass_locations.total > 0:
            t1 = max(time_range.start, cass_locations.start_time)
            t2 = min(time_range.stop, cass_locations.end_time)
            # issues arise when sending cassandra a query with the exact time range.
            # Data points at the start and end will be left out of the results.  This is an issue for full data
            # queries, to compensate for this we add .1 seconds to the given start and end time
            t1 -= .1
            t2 += .1
            cass_times = TimeRange(t1, t2)
            if limit:
                datasets.append(
                    fetch_nth_data(self.stream_key,
                                   cass_times,
                                   num_points=int(limit * cass_percent),
                                   location_metadata=cass_locations,
                                   request_id=request_id))
            else:
                datasets.append(
                    get_full_cass_dataset(self.stream_key,
                                          cass_times,
                                          location_metadata=cass_locations,
                                          request_id=request_id))
        return compile_datasets(datasets)

Example #4

0

Show file

    def get_dataset(self, time_range, limit, provenance_metadata, pad_dataset, request_id=None):
        """
        :param time_range:
        :param limit:
        :param provenance_metadata:
        :param pad_dataset:
        :param request_id:
        :return:
        """
        cass_locations, san_locations, messages = get_location_metadata(self.stream_key, time_range)
        provenance_metadata.add_messages(messages)
        # check for no data
        datasets = []
        total = float(san_locations.total + cass_locations.total)
        san_percent = cass_percent = 0
        if total != 0:
            san_percent = san_locations.total / total
            cass_percent = cass_locations.total / total

        # If this is a supporting stream (ie. not the primary requested stream),
        # get extra data points on both sides immediately outside of the requested
        # time range for higher quality interpolation of supporting stream data
        # into the primary data set at the request time boundaries. The extra
        # data points must be within the time range of the deployments.
        if pad_dataset and app.config['LOOKBACK_QUERY_LIMIT'] > 0:
            # Get the start time of the first and stop time of the last deployments
            # within the requested time range.
            deployment_time_range = self.get_deployment_time_range(time_range)
            if deployment_time_range.get("start", None):
                datasets.append(self.get_lookback_dataset(self.stream_key, time_range,
                                                          deployment_time_range["start"], request_id))
            if deployment_time_range.get("stop", None):
                datasets.append(self.get_lookforward_dataset(self.stream_key, time_range,
                                                             deployment_time_range["stop"], request_id))

        if san_locations.total > 0:
            # put the range down if we are within the time range
            t1 = max(time_range.start, san_locations.start_time)
            t2 = min(time_range.stop, san_locations.end_time)
            san_times = TimeRange(t1, t2)
            if limit:
                datasets.append(fetch_nsan_data(self.stream_key, san_times, num_points=int(limit * san_percent),
                                                location_metadata=san_locations))
            else:
                datasets.append(fetch_full_san_data(self.stream_key, san_times, location_metadata=san_locations))
        if cass_locations.total > 0:
            t1 = max(time_range.start, cass_locations.start_time)
            t2 = min(time_range.stop, cass_locations.end_time)
            # issues arise when sending cassandra a query with the exact time range.
            # Data points at the start and end will be left out of the results.  This is an issue for full data
            # queries, to compensate for this we add .1 seconds to the given start and end time
            t1 -= .1
            t2 += .1
            cass_times = TimeRange(t1, t2)
            if limit:
                datasets.append(fetch_nth_data(self.stream_key, cass_times, num_points=int(limit * cass_percent),
                                               location_metadata=cass_locations, request_id=request_id))
            else:
                datasets.append(get_full_cass_dataset(self.stream_key, cass_times,
                                                      location_metadata=cass_locations, request_id=request_id))
        return compile_datasets(datasets)