Example #1
0
    def load_feed(cls,
                  root,
                  start_time: datetime,
                  end_time: datetime,
                  product: str,
                  fmt: str = "parquet") -> dd:
        """Loads in a feed of real data and applies formatting to timestamp, price and size columns"""
        # Assume data is on the same day and just hours apart for now
        hour_delta = end_time.hour - start_time.hour
        files_to_load = []

        # TODO: introduce wrapping over days
        # TODO: split this function up!
        # TODO: BUG: struggles to load small blobs of data
        for i in range(0, hour_delta + 1):
            filename = start_time.date().isoformat() + "/" + str(
                "%02i" % (start_time.hour + i)) + "." + fmt
            cls.logger.debug(filename)
            files_to_load.append(filename)

        feed_df = pd.DataFrame()
        for filename in files_to_load:
            file_path = root + filename
            if fmt == "parquet":
                file_df = pd.read_parquet(file_path)
            else:
                file_df = pd.read_csv(file_path)
            file_df = DataSplitter.get_product(product, file_df)
            file_df = DataLoader().format_dd(file_df)
            file_df = file_df[start_time < file_df['time']]
            file_df = file_df[file_df['time'] < end_time]
            feed_df = feed_df.append(file_df)

        return feed_df
Example #2
0
    def load_split_data(real_root, start_time, end_time, product):
        feed_df = DataLoader().load_feed(real_root, start_time, end_time,
                                         product)
        feed_df = DataSplitter.get_product(product, feed_df)

        orders_df = DataSplitter.get_orders(feed_df)
        trades_df = DataSplitter.get_trades(feed_df)
        cancels_df = DataSplitter.get_cancellations(feed_df)

        return orders_df, trades_df, cancels_df