def _read_feather(self,
                   kind: str,
                   run_id: str,
                   raise_on_failed_deserialize: bool = False):
     class_mapping: Dict[str, type] = {
         class_to_filename(cls): cls
         for cls in list_schemas()
     }
     data = {}
     for path in [
             p for p in self.fs.glob(
                 f"{self.path}/{kind}/{run_id}.feather/*.feather")
     ]:
         cls_name = camel_to_snake_case(pathlib.Path(path).stem).replace(
             "__", "_")
         df = read_feather(path=path, fs=self.fs)
         if df is None:
             print(f"No data for {cls_name}")
             continue
         # Apply post read fixes
         try:
             objs = self._handle_table_nautilus(table=df,
                                                cls=class_mapping[cls_name],
                                                mappings={})
             data[cls_name] = objs
         except Exception as ex:
             if raise_on_failed_deserialize:
                 raise
             print(f"Failed to deserialize {cls_name}: {ex}")
     return sorted(sum(data.values(), list()), key=lambda x: x.ts_init)
 def list_partitions(self, cls_type: type):
     assert isinstance(cls_type,
                       type), "`cls_type` should be type, i.e. TradeTick"
     name = class_to_filename(cls_type)
     dataset = pq.ParquetDataset(self.path / f"{name}.parquet",
                                 filesystem=self.fs)
     partitions = {}
     for level in dataset.partitions.levels:
         partitions[level.name] = level.keys
     return partitions
Example #3
0
def test_class_to_filename(s, expected):
    assert class_to_filename(s) == expected