def _get_dps_results(self) -> List[DatapointsList]: def custom_sort_key(x): if x.timestamp: return x.timestamp[0] return 0 dps_lists = [DatapointsList([], cognite_client=self.client._cognite_client)] * len(self.query_ids) for q_id, dps_objects in self.query_id_to_datapoints_objects.items(): ts_id_to_dps_objects = defaultdict(lambda: []) for dps_object in dps_objects: ts_id_to_dps_objects[dps_object.id].append(dps_object) dps_list = DatapointsList([], cognite_client=self.client._cognite_client) for ts_id, dps_objects in ts_id_to_dps_objects.items(): dps = Datapoints() for dps_object in sorted(dps_objects, key=custom_sort_key): dps._extend(dps_object) if self.query_id_to_include_outside_points[q_id]: dps = self._remove_duplicates(dps) query_limit = self.query_id_to_limit[q_id] if query_limit and len(dps) > query_limit: dps = dps[:query_limit] dps_list.append(dps) dps_list = self._sort_dps_list_by_task_order(dps_list, self.query_id_to_tasks[q_id]) dps_lists[self.query_ids.index(q_id)] = dps_list return dps_lists
def test_get_range(): src_latest = Datapoints(timestamp=[20000000]) dst_latest = Datapoints(timestamp=[10000000]) start, end = datapoints._get_time_range(src_latest, dst_latest) assert (start, end) == (dst_latest[0].timestamp + 1, src_latest[0].timestamp + 1) start, end = datapoints._get_time_range(Datapoints(), Datapoints()) assert (start, end) == (0, 0)
def __init__(self, client, start, end, ts_item, aggregates, granularity, include_outside_points, limit): self.start = cognite.client.utils._time.timestamp_to_ms(start) self.end = cognite.client.utils._time.timestamp_to_ms(end) self.aggregates = ts_item.get("aggregates") or aggregates self.ts_item = {k: v for k, v in ts_item.items() if k in ["id", "externalId"]} self.granularity = granularity self.include_outside_points = include_outside_points self.limit = limit or float("inf") self.client = client self.request_limit = client._DPS_LIMIT_AGG if self.aggregates else client._DPS_LIMIT self.results = [] self.point_before = Datapoints() self.point_after = Datapoints()
def _get_datapoints( self, start: int, end: int, ts_item: Dict[str, Any], aggregates: List[str], granularity: str, include_outside_points: bool, limit: int, ) -> Datapoints: is_aggregated = aggregates or "aggregates" in ts_item payload = { "items": [ts_item], "start": start, "end": end, "aggregates": aggregates, "granularity": granularity, "includeOutsidePoints": include_outside_points, "limit": limit or (self.client._DPS_LIMIT_AGG if is_aggregated else self.client._DPS_LIMIT), } res = self.client._post(self.client._RESOURCE_PATH + "/list", json=payload).json()["items"][0] aggs = ts_item.get("aggregates", aggregates) expected_fields = [a for a in aggs] if aggs is not None else ["value"] dps = Datapoints._load(res, expected_fields, cognite_client=self.client._cognite_client) return dps
def retrieve_latest( self, id: Union[int, List[int]] = None, external_id: Union[str, List[str]] = None, before: Union[int, str, datetime] = None, ) -> Union[Datapoints, DatapointsList]: """`Get the latest datapoint for one or more time series <https://docs.cognite.com/api/v1/#operation/getLatest>`_ Args: id (Union[int, List[int]]: Id or list of ids. external_id (Union[str, List[str]): External id or list of external ids. before: Union[int, str, datetime]: Get latest datapoint before this time. Returns: Union[Datapoints, DatapointsList]: A Datapoints object containing the requested data, or a list of such objects. Examples: Getting the latest datapoint in a time series. This method returns a Datapoints object, so the datapoint will be the first element:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> res = c.datapoints.retrieve_latest(id=1)[0] You can also get the first datapoint before a specific time:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> res = c.datapoints.retrieve_latest(id=1, before="2d-ago")[0] If you need the latest datapoint for multiple time series simply give a list of ids. Note that we are using external ids here, but either will work:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> res = c.datapoints.retrieve_latest(external_id=["abc", "def"]) >>> latest_abc = res[0][0] >>> latest_def = res[1][0] """ before = cognite.client.utils._time.timestamp_to_ms(before) if before else None all_ids = self._process_ids(id, external_id, wrap_ids=True) is_single_id = self._is_single_identifier(id, external_id) if before: for id in all_ids: id.update({"before": before}) tasks = [ {"url_path": self._RESOURCE_PATH + "/latest", "json": {"items": chunk}} for chunk in utils._auxiliary.split_into_chunks(all_ids, self._RETRIEVE_LATEST_LIMIT) ] tasks_summary = utils._concurrency.execute_tasks_concurrently( self._post, tasks, max_workers=self._config.max_workers ) if tasks_summary.exceptions: raise tasks_summary.exceptions[0] res = tasks_summary.joined_results(lambda res: res.json()["items"]) if is_single_id: return Datapoints._load(res[0], cognite_client=self._cognite_client) return DatapointsList._load(res, cognite_client=self._cognite_client)
def _remove_duplicates(dps_object: Datapoints) -> Datapoints: frequencies = defaultdict(lambda: [0, []]) for i, timestamp in enumerate(dps_object.timestamp): frequencies[timestamp][0] += 1 frequencies[timestamp][1].append(i) indices_to_remove = [] for timestamp, freq in frequencies.items(): if freq[0] > 1: indices_to_remove += freq[1][1:] dps_object_without_duplicates = Datapoints(id=dps_object.id, external_id=dps_object.external_id) for attr, values in dps_object._get_non_empty_data_fields(): filtered_values = [elem for i, elem in enumerate(values) if i not in indices_to_remove] setattr(dps_object_without_duplicates, attr, filtered_values) return dps_object_without_duplicates
def retrieve_latest( self, id: Union[int, List[int]] = None, external_id: Union[str, List[str]] = None, before: Union[int, str, datetime] = None, ) -> Union[Datapoints, DatapointsList]: """Get the latest datapoint for one or more time series Args: id (Union[int, List[int]]: Id or list of ids. external_id (Union[str, List[str]): External id or list of external ids. before: Union[int, str, datetime]: Get latest datapoint before this time. Returns: Union[Datapoints, DatapointsList]: A Datapoints object containing the requested data, or a list of such objects. Examples: Getting the latest datapoint in a time series. This method returns a Datapoints object, so the datapoint will be the first element:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> res = c.datapoints.retrieve_latest(id=1)[0] You can also get the first datapoint before a specific time:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> res = c.datapoints.retrieve_latest(id=1, before="2d-ago")[0] If you need the latest datapoint for multiple time series simply give a list of ids. Note that we are using external ids here, but either will work:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> res = c.datapoints.retrieve_latest(external_id=["abc", "def"]) >>> latest_abc = res[0][0] >>> latest_def = res[1][0] """ before = cognite.client.utils._time.timestamp_to_ms(before) if before else None all_ids = self._process_ids(id, external_id, wrap_ids=True) is_single_id = self._is_single_identifier(id, external_id) if before: for id in all_ids: id.update({"before": before}) res = self._post(url_path=self._RESOURCE_PATH + "/latest", json={"items": all_ids}).json()["items"] if is_single_id: return Datapoints._load(res[0], cognite_client=self._cognite_client) return DatapointsList._load(res, cognite_client=self._cognite_client)
def _get_datapoints_with_paging( self, start: int, end: int, ts_item: Dict[str, Any], aggregates: List[str], granularity: str, include_outside_points: bool, limit: int, ) -> Datapoints: is_aggregated = aggregates or "aggregates" in ts_item per_request_limit = self.client._DPS_LIMIT_AGG if is_aggregated else self.client._DPS_LIMIT limit_next_request = per_request_limit next_start = start datapoints = Datapoints() all_datapoints = Datapoints() while ( (len(all_datapoints) == 0 or len(datapoints) == per_request_limit) and end > next_start and len(all_datapoints) < (limit or float("inf")) ): datapoints = self._get_datapoints( next_start, end, ts_item, aggregates, granularity, include_outside_points, limit_next_request ) if len(datapoints) == 0: break if limit: remaining_datapoints = limit - len(datapoints) if remaining_datapoints < per_request_limit: limit_next_request = remaining_datapoints latest_timestamp = int(datapoints.timestamp[-1]) next_start = latest_timestamp + ( cognite.client.utils._time.granularity_to_ms(granularity) if granularity else 1 ) all_datapoints._extend(datapoints) return all_datapoints
def retrieve( self, expression: str, start: Union[int, str, datetime], end: Union[int, str, datetime], limit: int = None ) -> Datapoints: """Calculate the result of a function on time series. Args: expression (str): Function to be calculated. start (Union[int, str, datetime]): Inclusive start. end (Union[int, str, datetime]): Exclusive end. Returns: Datapoints: A Datapoints object containing the calculated data. Examples: >>> from cognite.client.experimental import CogniteClient >>> c = CogniteClient() >>> dps = c.datapoints.synthetic.retrieve(expression="TS{id:123} + TS{externalId:'abc'}", start="2w-ago", end="now") """ if limit is None or limit == -1: limit = float("inf") query = { "expression": expression, "start": cognite.client.utils._time.timestamp_to_ms(start), "end": cognite.client.utils._time.timestamp_to_ms(end), } datapoints = Datapoints() while True: query["limit"] = min(limit, self._DPS_LIMIT) resp = self._post(url_path=self._SYNTHETIC_RESOURCE_PATH + "/query", json={"items": [query]}) data = resp.json()["items"][0] datapoints._extend(Datapoints._load(data, expected_fields=["value"])) limit -= len(data["datapoints"]) if len(data["datapoints"]) < self._DPS_LIMIT or limit <= 0: break query["start"] = data["datapoints"][-1]["timestamp"] + 1 return datapoints
def store_partial_result(self, raw_data, start, end): expected_fields = self.aggregates or ["value"] if self.include_outside_points and raw_data["datapoints"]: # assumes first query has full start/end range copy_data = copy.copy(raw_data) # shallow copy if raw_data["datapoints"][0]["timestamp"] < start: if not self.point_before: copy_data["datapoints"] = raw_data["datapoints"][:1] self.point_before = Datapoints._load( copy_data, expected_fields, cognite_client=self.client._cognite_client ) raw_data["datapoints"] = raw_data["datapoints"][1:] if raw_data["datapoints"] and raw_data["datapoints"][-1]["timestamp"] >= end: if not self.point_after: copy_data["datapoints"] = raw_data["datapoints"][-1:] self.point_after = Datapoints._load( copy_data, expected_fields, cognite_client=self.client._cognite_client ) raw_data["datapoints"] = raw_data["datapoints"][:-1] self.results.append(Datapoints._load(raw_data, expected_fields, cognite_client=self.client._cognite_client)) last_timestamp = raw_data["datapoints"] and raw_data["datapoints"][-1]["timestamp"] return len(raw_data["datapoints"]), last_timestamp
def replicate_datapoints( client_src: CogniteClient, client_dst: CogniteClient, ts_external_id: str, limit: Optional[int] = None, partition_size: int = 100000, mock_run: bool = False, job_id: int = 1, src_datapoint_transform: Optional[Callable[[Datapoint], Datapoint]] = None, timerange_transform: Optional[Callable[[Tuple[int, int]], Tuple[int, int]]] = None, start: Union[int, str] = None, end: Union[int, str] = None, value_manipulation_lambda_fnc: str = None, ) -> Tuple[bool, int]: """ Copies data points from the source tenant into the destination project, for the given time series. If data points already exist in the destination for the time series, only the newer data points in the source are copied over. Args: client_src: The client corresponding to the source project. client_dst: The client corresponding to the destination project. ts_external_id: The external id of the time series to replicate datapoints for limit: The maximum number of data points to copy partition_size: The maximum number of datapoints to retrieve per request mock_run: If true, only retrieves data points from source and does not insert into destination job_id: The batch number being processed src_datapoint_transform: Function to apply to all source datapoints before inserting into destination timerange_transform: Function to set the time range boundaries (start, end) arbitrarily. start: Timestamp to start replication onwards from; if not specified starts at most recent datapoint end: If specified, limits replication to datapoints earlier than the end time value_manipulation_lambda_fnc: A basic lambda function can be provided to manipulate datapoints as a string. Returns: A tuple of the success status (True if no failures) and the number of datapoints successfully replicated """ try: latest_dst_dp = client_dst.datapoints.retrieve_latest( external_id=ts_external_id) latest_src_dp = client_src.datapoints.retrieve_latest( external_id=ts_external_id) except CogniteAPIError as exc: logging.error( f"Job {job_id}: Failed for external id {ts_external_id}. {exc}") return False, 0 if not latest_src_dp: return True, 0 if src_datapoint_transform: latest_src_dp = Datapoints( timestamp=[src_datapoint_transform(latest_src_dp[0]).timestamp]) _start, _end = _get_time_range(latest_src_dp, latest_dst_dp) start = _start if start is None else timestamp_to_ms(start) end = _end if end is None else timestamp_to_ms(end) if timerange_transform: start, end = timerange_transform(start, end) # Api Restrictions start = max(start, 31536000000) # 1971 logging.debug( f"Job {job_id}: Ext_id: {ts_external_id} Retrieving datapoints between {start} and {end}" ) datapoints_count = 0 while start < end: num_to_fetch = partition_size if limit is None else min( partition_size, limit - datapoints_count) if num_to_fetch == 0: break try: datapoints = client_src.datapoints.retrieve( external_id=ts_external_id, start=start, end=end, limit=num_to_fetch) if not datapoints: break if src_datapoint_transform: transformed_values = [] transformed_timestamps = [] for src_datapoint in datapoints: transformed_datapoint = src_datapoint_transform( src_datapoint) transformed_timestamps.append( transformed_datapoint.timestamp) transformed_values.append(transformed_datapoint.value) datapoints = Datapoints(timestamp=transformed_timestamps, value=transformed_values) if value_manipulation_lambda_fnc: transformed_values = [] transformed_timestamps = [] lambda_fnc = evaluate_lambda_function( value_manipulation_lambda_fnc) if lambda_fnc: for src_datapoint in datapoints: try: transformed_timestamps.append( src_datapoint.timestamp) transformed_values.append( lambda_fnc(src_datapoint.value)) except Exception as e: logging.error( f"Could not manipulate the datapoint (value={src_datapoint.value}," + f" timestamp={src_datapoint.timestamp}). Error: {e}" ) datapoints = Datapoints(timestamp=transformed_timestamps, value=transformed_values) if not mock_run: client_dst.datapoints.insert(datapoints, external_id=ts_external_id) except CogniteAPIError as exc: logging.error( f"Job {job_id}: Failed for external id {ts_external_id}. {exc}" ) return False, datapoints_count else: datapoints_count += len(datapoints) start = datapoints[-1].timestamp + 1 logging.debug( f"Job {job_id}: Ext_id: {ts_external_id} Number of datapoints: {datapoints_count}" ) return True, datapoints_count
def retrieve_dataframe( self, start: Union[int, str, datetime], end: Union[int, str, datetime], aggregates: List[str], granularity: str, id: Union[int, List[int], Dict[str, Union[int, List[str]]], List[Dict[str, Union[int, List[str]]]]] = None, external_id: Union[ str, List[str], Dict[str, Union[int, List[str]]], List[Dict[str, Union[int, List[str]]]] ] = None, limit: int = None, include_aggregate_name=True, complete: str = None, ) -> "pandas.DataFrame": """Get a pandas dataframe describing the requested data. Note that you cannot specify the same ids/external_ids multiple times. Args: start (Union[int, str, datetime]): Inclusive start. end (Union[int, str, datetime]): Exclusive end. aggregates (List[str]): List of aggregate functions to apply. granularity (str): The granularity to fetch aggregates at. e.g. '1s', '2h', '10d'. id (Union[int, List[int], Dict[str, Any], List[Dict[str, Any]]]: Id or list of ids. Can also be object specifying aggregates. See example below. external_id (Union[str, List[str], Dict[str, Any], List[Dict[str, Any]]]): External id or list of external ids. Can also be object specifying aggregates. See example below. limit (int): Maximum number of datapoints to return for each time series. include_aggregate_name (bool): Include 'aggregate' in the column name. Defaults to True and should only be set to False when only a single aggregate is requested per id/externalId. complete (str): Post-processing of the dataframe. Pass 'fill' to insert missing entries into the index, and complete data where possible (supports interpolation, stepInterpolation, count, sum, totalVariation). Pass 'fill,dropna' to additionally drop rows in which any aggregate for any time series has missing values (typically rows at the start and end for interpolation aggregates). This option guarantees that all returned dataframes have the exact same shape and no missing values anywhere, and is only supported for aggregates sum, count, totalVariance, interpolation and stepInterpolation. Returns: pandas.DataFrame: The requested dataframe Examples: Get a pandas dataframe:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> df = c.datapoints.retrieve_dataframe(id=[1,2,3], start="2w-ago", end="now", ... aggregates=["average","sum"], granularity="1h") Get a pandas dataframe with the index regularly spaced at 1 minute intervals, missing values completed and without the aggregate name in the columns:: >>> from cognite.client import CogniteClient >>> c = CogniteClient() >>> df = c.datapoints.retrieve_dataframe(id=[1,2,3], start="2w-ago", end="now", ... aggregates=["interpolation"], granularity="1m", include_aggregate_name=False, complete="fill,dropna") """ pd = utils._auxiliary.local_import("pandas") if id is not None: id_dpl = self.retrieve( id=id, start=start, end=end, aggregates=aggregates, granularity=granularity, limit=limit ) id_df = id_dpl.to_pandas(column_names="id") else: id_df = pd.DataFrame() id_dpl = DatapointsList([]) if external_id is not None: external_id_dpl = self.retrieve( external_id=external_id, start=start, end=end, aggregates=aggregates, granularity=granularity, limit=limit, ) external_id_df = external_id_dpl.to_pandas() else: external_id_df = pd.DataFrame() external_id_dpl = DatapointsList([]) df = pd.concat([id_df, external_id_df], axis="columns") complete = [s.strip() for s in (complete or "").split(",")] if set(complete) - {"fill", "dropna", ""}: raise ValueError("complete should be 'fill', 'fill,dropna' or Falsy") if "fill" in complete and df.shape[0] > 1: ag_used_by_id = { dp.id: [attr for attr, _ in dp._get_non_empty_data_fields(get_empty_lists=True)] for dpl in [id_dpl, external_id_dpl] for dp in (dpl.data if isinstance(dpl, DatapointsList) else [dpl]) } ts_meta = self._cognite_client.time_series.retrieve_multiple( ids=[id for id, aggs_used in ag_used_by_id.items() if "interpolation" in aggs_used] ) is_step_dict = { str(field): bool(ts.is_step) for ts in ts_meta for field in [ts.id, ts.external_id] if field } df = self._dataframe_fill(df, granularity, is_step_dict) if "dropna" in complete: self._dataframe_safe_dropna(df, set([ag for id, ags in ag_used_by_id.items() for ag in ags])) if not include_aggregate_name: Datapoints._strip_aggregate_names(df) return df
def retrieve( self, expression: Union[str, "sympy.Expr"], start: Union[int, str, datetime], end: Union[int, str, datetime], limit: int = None, variables: Dict[str, Union[str, TimeSeries]] = None, aggregate: str = None, granularity: str = None, ) -> Datapoints: """Calculate the result of a function on time series. Args: expression (Union[str,sympy.Expr]): Function to be calculated. Supports both strings and sympy expressions. Strings can have either the API `ts{}` syntax, or contain variable names to be replaced using the `variables` parameter. start (Union[int, str, datetime]): Inclusive start. end (Union[int, str, datetime]): Exclusive end. limit (int): Number of datapoints to retrieve. variables (Dict[str,Union[str,TimeSeries]]): An optional map of symbol replacements. aggregate (str): use this aggregate when replacing entries from `variables`, does not affect time series given in the `ts{}` syntax. granularity (str): use this granularity with the aggregate. Returns: Datapoints: A Datapoints object containing the calculated data. Examples: Request a synthetic time series query with direct syntax >>> from cognite.client.experimental import CogniteClient >>> c = CogniteClient() >>> dps = c.datapoints.synthetic.retrieve(expression="TS{id:123} + TS{externalId:'abc'}", start="2w-ago", end="now") Use variables to re-use an expression: >>> from cognite.client.experimental import CogniteClient >>> c = CogniteClient() >>> vars = {"A": "my_ts_external_id", "B": client.time_series.retrieve(id=1)} >>> dps = c.datapoints.synthetic.retrieve(expression="A+B", start="2w-ago", end="now", variables=vars) Use sympy to build complex expressions: >>> from cognite.client.experimental import CogniteClient >>> c = CogniteClient() >>> from sympy import symbols, cos, pi >>> a = sympy.symbols('a') >>> dps = c.datapoints.synthetic.retrieve(pi * cos(a), start="2w-ago", end="now", variables={"a": "my_ts_external_id"},aggregate='interpolation',granularity='1m') """ if limit is None or limit == -1: limit = float("inf") expression, short_expression = SyntheticDatapointsAPI._build_expression( expression, variables, aggregate, granularity) query = { "expression": expression, "start": cognite.client.utils._time.timestamp_to_ms(start), "end": cognite.client.utils._time.timestamp_to_ms(end), } datapoints = Datapoints(value=[], error=[]) datapoints.external_id = short_expression # for dataframe readability while True: query["limit"] = min(limit, self._DPS_LIMIT) resp = self._post(url_path=self._SYNTHETIC_RESOURCE_PATH + "/query", json={"items": [query]}) data = resp.json()["items"][0] datapoints._extend( Datapoints._load(data, expected_fields=["value", "error"])) limit -= len(data["datapoints"]) if len(data["datapoints"]) < self._DPS_LIMIT or limit <= 0: break query["start"] = data["datapoints"][-1]["timestamp"] + 1 return datapoints
def mock_cogcli_datapoints_retrieve_single(): with monkeypatch_cognite_client() as cogmock: cogmock.datapoints.retrieve.return_value = Datapoints( id=1, external_id="1", value=[1, 2, 3], timestamp=[1000, 2000, 3000] ) yield
def mock_cogcli_datapoints_query(): with monkeypatch_cognite_client() as cogmock: cogmock.datapoints.query.return_value = [ DatapointsList([Datapoints(id=1, external_id="1", value=[1, 2, 3], timestamp=[1000, 2000, 3000])]) ] yield