Ejemplo n.º 1
0
    def set_computed_values(cls, values: dict) -> dict:
        """
        Compute values for data_id, last_cached_at, and sort_value if they are not already specified.
        """
        data_id = cls.get_by_name_or_alias(values, "data_id")
        source_id = cls.get_by_name_or_alias(values, "source_id")

        if not data_id:
            m = md5()
            m.update(source_id.encode())
            m.update(cls.get_by_name_or_alias(values, "path").encode())
            data_id = m.hexdigest()
            values["dataId"] = data_id

        if not cls.get_by_name_or_alias(
                values, "last_cached_at") and fs.data_exists(data_id):
            values["lastCachedAt"] = fs.get_file_last_modified(
                fs.data_path(data_id), format="unix_milliseconds")

        if not cls.get_by_name_or_alias(values, "sort_value"):
            existing_nodes = SOURCES[source_id].nodes.values()
            sort_value = max((0, *(x.sort_value for x in existing_nodes))) + 1
            values["sortValue"] = sort_value

        return values
Ejemplo n.º 2
0
 async def get_data(self, ignore_cache=False) -> pd.DataFrame:
     """
     Load the data for this node, also adding it to the cache.
     """
     if fs.data_exists(self.data_id) and not ignore_cache:
         return fs.read_data(self.data_id)
     else:
         if inspect.iscoroutinefunction(self.source._get_data):
             data = await self.source._get_data(self.path)
         else:
             data = self.source._get_data(self.path)
         fs.save_data(self.data_id, data)
         self.last_cached_at = fs.get_file_last_modified(
             fs.data_path(self.data_id), format="unix_milliseconds")
         return data
Ejemplo n.º 3
0
 async def build_profile_report(self) -> None:
     """
     Build a pandas profile report. This is done in a separate process because it can be quite slow.
     """
     try:
         if not fs.profile_report_exists(self.data_id):
             await self.get_data()
             await execute_profile_report_builder(
                 data_path=fs.data_path(self.data_id),
                 output_path=fs.profile_report_path(self.data_id),
                 title=f"{self.source.name} - {self.path}",
             )
             if not fs.profile_report_exists(self.data_id):
                 raise Exception(
                     "The profile report failed to build for some reason")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))