Example #1
0
 def __iter__(self):
     buffer = []
     for df in self.source_datapipe:
         buffer.append(df)
         if len(buffer) == self.n_batch:
             yield df_wrapper.concat(buffer)
             buffer = []
     if len(buffer):
         yield df_wrapper.concat(buffer)
Example #2
0
 def __iter__(self):
     size = None
     all_buffer = []
     for df in self.source_datapipe:
         if size is None:
             size = df_wrapper.get_len(df)
         for i in range(df_wrapper.get_len(df)):
             all_buffer.append(df_wrapper.get_item(df, i))
     random.shuffle(all_buffer)
     buffer = []
     for df in all_buffer:
         buffer.append(df)
         if len(buffer) == size:
             yield df_wrapper.concat(buffer)
             buffer = []
     if len(buffer):
         yield df_wrapper.concat(buffer)
Example #3
0
    def __iter__(self):
        size = None
        all_buffer = []
        filter_res = []
        for df in self.source_datapipe:
            if size is None:
                size = len(df.index)
            for i in range(len(df.index)):
                all_buffer.append(df[i:i + 1])
                filter_res.append(self.filter_fn(df.iloc[i]))

        buffer = []
        for df, res in zip(all_buffer, filter_res):
            if res:
                buffer.append(df)
                if len(buffer) == size:
                    yield df_wrapper.concat(buffer)
                    buffer = []
        if len(buffer):
            yield df_wrapper.concat(buffer)
Example #4
0
    def _returnIfTrue(self, data):
        condition = self.filter_fn(data, *self.args, **self.kwargs)

        if df_wrapper.is_column(condition):
            # We are operating on DataFrames filter here
            result = []
            for idx, mask in enumerate(df_wrapper.iterate(condition)):
                if mask:
                    result.append(df_wrapper.get_item(data, idx))
            if len(result):
                return df_wrapper.concat(result)
            else:
                return None

        if not isinstance(condition, bool):
            raise ValueError("Boolean output is required for `filter_fn` of FilterIterDataPipe, got", type(condition))
        if condition:
            return data