def get_dataset(self, time_range, limit, provenance_metadata, pad_forward, deployments, request_id=None): """ :param time_range: :param limit: :param provenance_metadata: :param pad_forward: :param deployments: :param request_id: :return: """ cass_locations, san_locations, messages = get_location_metadata(self.stream_key, time_range) provenance_metadata.add_messages(messages) # check for no data datasets = [] total = float(san_locations.total + cass_locations.total) san_percent = cass_percent = 0 if total != 0: san_percent = san_locations.total / total cass_percent = cass_locations.total / total if pad_forward: # pad forward on some datasets datasets.append(self.get_lookback_dataset(self.stream_key, time_range, deployments, request_id)) if san_locations.total > 0: # put the range down if we are within the time range t1 = max(time_range.start, san_locations.start_time) t2 = min(time_range.stop, san_locations.end_time) san_times = TimeRange(t1, t2) if limit: datasets.append(fetch_nsan_data(self.stream_key, san_times, num_points=int(limit * san_percent), location_metadata=san_locations)) else: datasets.append(fetch_full_san_data(self.stream_key, san_times, location_metadata=san_locations)) if cass_locations.total > 0: t1 = max(time_range.start, cass_locations.start_time) t2 = min(time_range.stop, cass_locations.end_time) # issues arise when sending cassandra a query with the exact time range. # Data points at the start and end will be left out of the results. This is an issue for full data # queries, to compensate for this we add .1 seconds to the given start and end time t1 -= .1 t2 += .1 cass_times = TimeRange(t1, t2) if limit: datasets.append(fetch_nth_data(self.stream_key, cass_times, num_points=int(limit * cass_percent), location_metadata=cass_locations, request_id=request_id)) else: datasets.append(get_full_cass_dataset(self.stream_key, cass_times, location_metadata=cass_locations, request_id=request_id)) return compile_datasets(datasets)
def main(args): if len(args) != 3: usage("Expecting 3 arguments, got " + str(len(args))) return # Extract the arguments old_sk_vals = args[0] new_sk_vals = args[1] time_stamps = args[2] # Extract the refdes, method and stream id from the stream_key values (old_refdes, old_method, old_stream_id) = split_sk_vals("old", old_sk_vals) (new_refdes, new_method, new_stream_id) = split_sk_vals("new", new_sk_vals) if old_refdes is None or new_refdes is None: return # Extract the subsite, node and sensor from the refdes values (old_subsite, old_node, old_sensor) = split_refdes("old", old_refdes) (new_subsite, new_node, new_sensor) = split_refdes("new", new_refdes) if old_subsite is None or new_subsite is None: return # Create StreamKeys old_stream_key = get_stream_key("old", old_subsite, old_node, old_sensor, old_method, old_stream_id) new_stream_key = get_stream_key("new", new_subsite, new_node, new_sensor, new_method, new_stream_id) if old_stream_key is None or new_stream_key is None: return # Use the time stamp ranges to create a TimeRange time_range = get_time_range(time_stamps) if time_range is None: return initialize_worker() dataset = get_full_cass_dataset(old_stream_key, time_range, keep_exclusions=True) insert_dataset(new_stream_key, dataset)
def get_dataset(self, time_range, limit, provenance_metadata, pad_forward, deployments, request_id=None): """ :param time_range: :param limit: :param provenance_metadata: :param pad_forward: :param deployments: :param request_id: :return: """ cass_locations, san_locations, messages = get_location_metadata( self.stream_key, time_range) provenance_metadata.add_messages(messages) # check for no data datasets = [] total = float(san_locations.total + cass_locations.total) san_percent = cass_percent = 0 if total != 0: san_percent = san_locations.total / total cass_percent = cass_locations.total / total if pad_forward: # pad forward on some datasets datasets.append( self.get_lookback_dataset(self.stream_key, time_range, deployments, request_id)) if san_locations.total > 0: # put the range down if we are within the time range t1 = max(time_range.start, san_locations.start_time) t2 = min(time_range.stop, san_locations.end_time) san_times = TimeRange(t1, t2) if limit: datasets.append( fetch_nsan_data(self.stream_key, san_times, num_points=int(limit * san_percent), location_metadata=san_locations)) else: datasets.append( fetch_full_san_data(self.stream_key, san_times, location_metadata=san_locations)) if cass_locations.total > 0: t1 = max(time_range.start, cass_locations.start_time) t2 = min(time_range.stop, cass_locations.end_time) # issues arise when sending cassandra a query with the exact time range. # Data points at the start and end will be left out of the results. This is an issue for full data # queries, to compensate for this we add .1 seconds to the given start and end time t1 -= .1 t2 += .1 cass_times = TimeRange(t1, t2) if limit: datasets.append( fetch_nth_data(self.stream_key, cass_times, num_points=int(limit * cass_percent), location_metadata=cass_locations, request_id=request_id)) else: datasets.append( get_full_cass_dataset(self.stream_key, cass_times, location_metadata=cass_locations, request_id=request_id)) return compile_datasets(datasets)
def get_dataset(self, time_range, limit, provenance_metadata, pad_dataset, request_id=None): """ :param time_range: :param limit: :param provenance_metadata: :param pad_dataset: :param request_id: :return: """ cass_locations, san_locations, messages = get_location_metadata(self.stream_key, time_range) provenance_metadata.add_messages(messages) # check for no data datasets = [] total = float(san_locations.total + cass_locations.total) san_percent = cass_percent = 0 if total != 0: san_percent = san_locations.total / total cass_percent = cass_locations.total / total # If this is a supporting stream (ie. not the primary requested stream), # get extra data points on both sides immediately outside of the requested # time range for higher quality interpolation of supporting stream data # into the primary data set at the request time boundaries. The extra # data points must be within the time range of the deployments. if pad_dataset and app.config['LOOKBACK_QUERY_LIMIT'] > 0: # Get the start time of the first and stop time of the last deployments # within the requested time range. deployment_time_range = self.get_deployment_time_range(time_range) if deployment_time_range.get("start", None): datasets.append(self.get_lookback_dataset(self.stream_key, time_range, deployment_time_range["start"], request_id)) if deployment_time_range.get("stop", None): datasets.append(self.get_lookforward_dataset(self.stream_key, time_range, deployment_time_range["stop"], request_id)) if san_locations.total > 0: # put the range down if we are within the time range t1 = max(time_range.start, san_locations.start_time) t2 = min(time_range.stop, san_locations.end_time) san_times = TimeRange(t1, t2) if limit: datasets.append(fetch_nsan_data(self.stream_key, san_times, num_points=int(limit * san_percent), location_metadata=san_locations)) else: datasets.append(fetch_full_san_data(self.stream_key, san_times, location_metadata=san_locations)) if cass_locations.total > 0: t1 = max(time_range.start, cass_locations.start_time) t2 = min(time_range.stop, cass_locations.end_time) # issues arise when sending cassandra a query with the exact time range. # Data points at the start and end will be left out of the results. This is an issue for full data # queries, to compensate for this we add .1 seconds to the given start and end time t1 -= .1 t2 += .1 cass_times = TimeRange(t1, t2) if limit: datasets.append(fetch_nth_data(self.stream_key, cass_times, num_points=int(limit * cass_percent), location_metadata=cass_locations, request_id=request_id)) else: datasets.append(get_full_cass_dataset(self.stream_key, cass_times, location_metadata=cass_locations, request_id=request_id)) return compile_datasets(datasets)