예제 #1
0
 def __iter__(self):
     size = None
     all_buffer = []
     for df in self.source_datapipe:
         if size is None:
             size = df_wrapper.get_len(df)
         for i in range(df_wrapper.get_len(df)):
             all_buffer.append(df_wrapper.get_item(df, i))
     random.shuffle(all_buffer)
     buffer = []
     for df in all_buffer:
         buffer.append(df)
         if len(buffer) == size:
             yield df_wrapper.concat(buffer)
             buffer = []
     if len(buffer):
         yield df_wrapper.concat(buffer)
예제 #2
0
    def _returnIfTrue(self, data):
        condition = self.filter_fn(data, *self.args, **self.kwargs)

        if df_wrapper.is_column(condition):
            # We are operating on DataFrames filter here
            result = []
            for idx, mask in enumerate(df_wrapper.iterate(condition)):
                if mask:
                    result.append(df_wrapper.get_item(data, idx))
            if len(result):
                return df_wrapper.concat(result)
            else:
                return None

        if not isinstance(condition, bool):
            raise ValueError("Boolean output is required for `filter_fn` of FilterIterDataPipe, got", type(condition))
        if condition:
            return data