Example #1
0
    def _compute_post_memory_usage(self, X: DataFrame):
        X_len = len(X)
        self.post_memory_usage = get_approximate_df_mem_usage(
            X, sample_ratio=0.2).sum()
        self.post_memory_usage_per_row = self.post_memory_usage / X_len

        available_mem = psutil.virtual_memory().available
        post_memory_usage_percent = self.post_memory_usage / (
            available_mem + self.post_memory_usage + self.pre_memory_usage)
        self._log(
            20,
            f'\tTrain Data (Processed) Memory Usage: {round(self.post_memory_usage / 1e6, 2)} MB ({round(post_memory_usage_percent * 100, 1)}% of available memory)'
        )
        if post_memory_usage_percent > 0.15:
            self._log(
                30,
                f'\tWarning: Data size post feature transformation consumes {round(post_memory_usage_percent * 100, 1)}% of available memory. Consider increasing memory or subsampling the data to avoid instability.'
            )
Example #2
0
 def _estimate_memory_usage(self, X, **kwargs):
     return 10 * get_approximate_df_mem_usage(X).sum()
Example #3
0
 def _estimate_data_memory_usage(self):
     X_mem = get_approximate_df_mem_usage(self.X).sum()
     y_mem = get_approximate_df_mem_usage(self.y.to_frame()).sum()
     return X_mem + y_mem
Example #4
0
 def _estimate_memory_usage(self, X, **kwargs):
     num_classes = self.num_classes if self.num_classes else 1  # self.num_classes could be None after initalization if it's a regression problem
     data_mem_uasge = get_approximate_df_mem_usage(X).sum()
     approx_mem_size_req = data_mem_uasge * 7 + data_mem_uasge / 4 * num_classes  # TODO: Extremely crude approximation, can be vastly improved
     return approx_mem_size_req