Ejemplo n.º 1
0
    def _get_dps_results(self) -> List[DatapointsList]:
        def custom_sort_key(x):
            if x.timestamp:
                return x.timestamp[0]
            return 0

        dps_lists = [DatapointsList([], cognite_client=self.client._cognite_client)] * len(self.query_ids)
        for q_id, dps_objects in self.query_id_to_datapoints_objects.items():
            ts_id_to_dps_objects = defaultdict(lambda: [])
            for dps_object in dps_objects:
                ts_id_to_dps_objects[dps_object.id].append(dps_object)

            dps_list = DatapointsList([], cognite_client=self.client._cognite_client)
            for ts_id, dps_objects in ts_id_to_dps_objects.items():
                dps = Datapoints()
                for dps_object in sorted(dps_objects, key=custom_sort_key):
                    dps._extend(dps_object)
                if self.query_id_to_include_outside_points[q_id]:
                    dps = self._remove_duplicates(dps)
                query_limit = self.query_id_to_limit[q_id]
                if query_limit and len(dps) > query_limit:
                    dps = dps[:query_limit]
                dps_list.append(dps)
            dps_list = self._sort_dps_list_by_task_order(dps_list, self.query_id_to_tasks[q_id])
            dps_lists[self.query_ids.index(q_id)] = dps_list
        return dps_lists
Ejemplo n.º 2
0
    def retrieve_latest(
        self,
        id: Union[int, List[int]] = None,
        external_id: Union[str, List[str]] = None,
        before: Union[int, str, datetime] = None,
    ) -> Union[Datapoints, DatapointsList]:
        """`Get the latest datapoint for one or more time series <https://docs.cognite.com/api/v1/#operation/getLatest>`_

        Args:
            id (Union[int, List[int]]: Id or list of ids.
            external_id (Union[str, List[str]): External id or list of external ids.
            before: Union[int, str, datetime]: Get latest datapoint before this time.

        Returns:
            Union[Datapoints, DatapointsList]: A Datapoints object containing the requested data, or a list of such objects.

        Examples:

            Getting the latest datapoint in a time series. This method returns a Datapoints object, so the datapoint will
            be the first element::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.datapoints.retrieve_latest(id=1)[0]

            You can also get the first datapoint before a specific time::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.datapoints.retrieve_latest(id=1, before="2d-ago")[0]

            If you need the latest datapoint for multiple time series simply give a list of ids. Note that we are
            using external ids here, but either will work::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.datapoints.retrieve_latest(external_id=["abc", "def"])
                >>> latest_abc = res[0][0]
                >>> latest_def = res[1][0]
        """
        before = cognite.client.utils._time.timestamp_to_ms(before) if before else None
        all_ids = self._process_ids(id, external_id, wrap_ids=True)
        is_single_id = self._is_single_identifier(id, external_id)
        if before:
            for id in all_ids:
                id.update({"before": before})

        tasks = [
            {"url_path": self._RESOURCE_PATH + "/latest", "json": {"items": chunk}}
            for chunk in utils._auxiliary.split_into_chunks(all_ids, self._RETRIEVE_LATEST_LIMIT)
        ]
        tasks_summary = utils._concurrency.execute_tasks_concurrently(
            self._post, tasks, max_workers=self._config.max_workers
        )
        if tasks_summary.exceptions:
            raise tasks_summary.exceptions[0]
        res = tasks_summary.joined_results(lambda res: res.json()["items"])
        if is_single_id:
            return Datapoints._load(res[0], cognite_client=self._cognite_client)
        return DatapointsList._load(res, cognite_client=self._cognite_client)
Ejemplo n.º 3
0
    def _sort_dps_list_by_task_order(self, dps_list: DatapointsList, queries: List[_DPTask]):
        order = {}
        for i, q in enumerate(queries):
            identifier = utils._auxiliary.unwrap_identifer(q.ts_item)
            order[identifier] = i

        def custom_sort_order(item):
            if item.id in order:
                return order[item.id]
            return order[item.external_id]

        return DatapointsList(sorted(dps_list, key=custom_sort_order), cognite_client=self.client._cognite_client)
Ejemplo n.º 4
0
    def retrieve_latest(
        self,
        id: Union[int, List[int]] = None,
        external_id: Union[str, List[str]] = None,
        before: Union[int, str, datetime] = None,
    ) -> Union[Datapoints, DatapointsList]:
        """Get the latest datapoint for one or more time series

        Args:
            id (Union[int, List[int]]: Id or list of ids.
            external_id (Union[str, List[str]): External id or list of external ids.
            before: Union[int, str, datetime]: Get latest datapoint before this time.

        Returns:
            Union[Datapoints, DatapointsList]: A Datapoints object containing the requested data, or a list of such objects.

        Examples:

            Getting the latest datapoint in a time series. This method returns a Datapoints object, so the datapoint will
            be the first element::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.datapoints.retrieve_latest(id=1)[0]

            You can also get the first datapoint before a specific time::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.datapoints.retrieve_latest(id=1, before="2d-ago")[0]

            If you need the latest datapoint for multiple time series simply give a list of ids. Note that we are
            using external ids here, but either will work::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.datapoints.retrieve_latest(external_id=["abc", "def"])
                >>> latest_abc = res[0][0]
                >>> latest_def = res[1][0]
        """
        before = cognite.client.utils._time.timestamp_to_ms(before) if before else None
        all_ids = self._process_ids(id, external_id, wrap_ids=True)
        is_single_id = self._is_single_identifier(id, external_id)
        if before:
            for id in all_ids:
                id.update({"before": before})

        res = self._post(url_path=self._RESOURCE_PATH + "/latest", json={"items": all_ids}).json()["items"]
        if is_single_id:
            return Datapoints._load(res[0], cognite_client=self._cognite_client)
        return DatapointsList._load(res, cognite_client=self._cognite_client)
Ejemplo n.º 5
0
    def retrieve_dataframe(
        self,
        start: Union[int, str, datetime],
        end: Union[int, str, datetime],
        aggregates: List[str],
        granularity: str,
        id: Union[int, List[int], Dict[str, Union[int, List[str]]], List[Dict[str, Union[int, List[str]]]]] = None,
        external_id: Union[
            str, List[str], Dict[str, Union[int, List[str]]], List[Dict[str, Union[int, List[str]]]]
        ] = None,
        limit: int = None,
        include_aggregate_name=True,
        complete: str = None,
    ) -> "pandas.DataFrame":
        """Get a pandas dataframe describing the requested data.

        Note that you cannot specify the same ids/external_ids multiple times.

        Args:
            start (Union[int, str, datetime]): Inclusive start.
            end (Union[int, str, datetime]): Exclusive end.
            aggregates (List[str]): List of aggregate functions to apply.
            granularity (str): The granularity to fetch aggregates at. e.g. '1s', '2h', '10d'.
            id (Union[int, List[int], Dict[str, Any], List[Dict[str, Any]]]: Id or list of ids. Can also be object
                specifying aggregates. See example below.
            external_id (Union[str, List[str], Dict[str, Any], List[Dict[str, Any]]]): External id or list of external
                ids. Can also be object specifying aggregates. See example below.
            limit (int): Maximum number of datapoints to return for each time series.
            include_aggregate_name (bool): Include 'aggregate' in the column name. Defaults to True and should only be set to False when only a single aggregate is requested per id/externalId.
            complete (str): Post-processing of the dataframe.

                Pass 'fill' to insert missing entries into the index, and complete data where possible (supports interpolation, stepInterpolation, count, sum, totalVariation).

                Pass 'fill,dropna' to additionally drop rows in which any aggregate for any time series has missing values (typically rows at the start and end for interpolation aggregates).
                This option guarantees that all returned dataframes have the exact same shape and no missing values anywhere, and is only supported for aggregates sum, count, totalVariance, interpolation and stepInterpolation.

        Returns:
            pandas.DataFrame: The requested dataframe

        Examples:

            Get a pandas dataframe::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> df = c.datapoints.retrieve_dataframe(id=[1,2,3], start="2w-ago", end="now",
                ...         aggregates=["average","sum"], granularity="1h")

            Get a pandas dataframe with the index regularly spaced at 1 minute intervals, missing values completed and without the aggregate name in the columns::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> df = c.datapoints.retrieve_dataframe(id=[1,2,3], start="2w-ago", end="now",
                ...         aggregates=["interpolation"], granularity="1m", include_aggregate_name=False, complete="fill,dropna")
        """
        pd = utils._auxiliary.local_import("pandas")

        if id is not None:
            id_dpl = self.retrieve(
                id=id, start=start, end=end, aggregates=aggregates, granularity=granularity, limit=limit
            )
            id_df = id_dpl.to_pandas(column_names="id")
        else:
            id_df = pd.DataFrame()
            id_dpl = DatapointsList([])

        if external_id is not None:
            external_id_dpl = self.retrieve(
                external_id=external_id,
                start=start,
                end=end,
                aggregates=aggregates,
                granularity=granularity,
                limit=limit,
            )
            external_id_df = external_id_dpl.to_pandas()
        else:
            external_id_df = pd.DataFrame()
            external_id_dpl = DatapointsList([])

        df = pd.concat([id_df, external_id_df], axis="columns")

        complete = [s.strip() for s in (complete or "").split(",")]
        if set(complete) - {"fill", "dropna", ""}:
            raise ValueError("complete should be 'fill', 'fill,dropna' or Falsy")

        if "fill" in complete and df.shape[0] > 1:
            ag_used_by_id = {
                dp.id: [attr for attr, _ in dp._get_non_empty_data_fields(get_empty_lists=True)]
                for dpl in [id_dpl, external_id_dpl]
                for dp in (dpl.data if isinstance(dpl, DatapointsList) else [dpl])
            }
            ts_meta = self._cognite_client.time_series.retrieve_multiple(
                ids=[id for id, aggs_used in ag_used_by_id.items() if "interpolation" in aggs_used]
            )
            is_step_dict = {
                str(field): bool(ts.is_step) for ts in ts_meta for field in [ts.id, ts.external_id] if field
            }
            df = self._dataframe_fill(df, granularity, is_step_dict)

            if "dropna" in complete:
                self._dataframe_safe_dropna(df, set([ag for id, ags in ag_used_by_id.items() for ag in ags]))

        if not include_aggregate_name:
            Datapoints._strip_aggregate_names(df)

        return df
Ejemplo n.º 6
0
def mock_cogcli_datapoints_query():
    with monkeypatch_cognite_client() as cogmock:
        cogmock.datapoints.query.return_value = [
            DatapointsList([Datapoints(id=1, external_id="1", value=[1, 2, 3], timestamp=[1000, 2000, 3000])])
        ]
        yield
Ejemplo n.º 7
0
 def _get_dps_results(self, task_lists: List[List[_DPTask]]) -> List[DatapointsList]:
     return [
         DatapointsList([t.result() for t in tl if not t.missing], cognite_client=self.client._cognite_client)
         for tl in task_lists
     ]