Exemple #1
0
 def test_human_readable_byte_size(self):
     n_bytes = 1393500160  # about 1.394 GB
     self.assertEqual(
         bytes_to_string(n_bytes), "1.3GB", "this number of bytes should "
         "result in 1.3GB and not in %s" % bytes_to_string(n_bytes))
     self.assertEqual(bytes_to_string(0), "0B",
                      "0 bytes should result in \"0B\"")
Exemple #2
0
    def _init_in_memory_chunks(self, size):
        # check if we need to allocate memory.
        if hasattr(self, '_in_memory_chunks') and self._in_memory_chunks.size == size:
            assert hasattr(self, '_in_memory_chunks')
            self.logger.info("re-use in memory data.")
            return
        elif self._check_resume_iteration() and not self._in_memory_chunks_set:
            pass
            #self.logger.warning('Resuming kmeans iteration without the setting "keep_data=True", will re-create'
            #                    ' the linear in-memory data. This is inefficient! Consider setting keep_data=True,'
            #                    ' when you intend to resume.')

        available_mem = psutil.virtual_memory().available
        required_mem = self._calculate_required_memory(size)
        if required_mem <= available_mem:
            self._in_memory_chunks = np.empty(shape=(size, self.data_producer.dimension()),
                                              order='C', dtype=np.float32)
        else:
            if self.oom_strategy == 'raise':
                self.logger.warning('K-means failed to load all the data (%s required, %s available) into memory. '
                                    'Consider using a larger stride or set the oom_strategy to \'memmap\' which works '
                                    'with a memmapped temporary file.'
                                    % (bytes_to_string(required_mem), bytes_to_string(available_mem)))
                raise MemoryError()
            else:
                self.logger.warning('K-means failed to load all the data (%s required, %s available) into memory '
                                    'and now uses a memmapped temporary file which is comparably slow. '
                                    'Consider using a larger stride.'
                                    % (bytes_to_string(required_mem), bytes_to_string(available_mem)))
                self._in_memory_chunks = np.memmap(tempfile.mkstemp()[1], mode="w+",
                                                   shape=(size, self.data_producer.dimension()), order='C',
                                                   dtype=np.float32)
Exemple #3
0
 def _init_in_memory_chunks(self, size):
     available_mem = psutil.virtual_memory().available
     required_mem = self._calculate_required_memory(size)
     if required_mem <= available_mem:
         self._in_memory_chunks = np.empty(
             shape=(size, self.data_producer.dimension()),
             order='C',
             dtype=np.float32)
     else:
         if self.oom_strategy == 'raise':
             self.logger.warning(
                 'K-means failed to load all the data (%s required, %s available) into memory. '
                 'Consider using a larger stride or set the oom_strategy to \'memmap\' which works '
                 'with a memmapped temporary file.' %
                 (bytes_to_string(required_mem),
                  bytes_to_string(available_mem)))
             raise MemoryError()
         else:
             self.logger.warning(
                 'K-means failed to load all the data (%s required, %s available) into memory '
                 'and now uses a memmapped temporary file which is comparably slow. '
                 'Consider using a larger stride.' %
                 (bytes_to_string(required_mem),
                  bytes_to_string(available_mem)))
             self._in_memory_chunks = np.memmap(
                 tempfile.mkstemp()[1],
                 mode="w+",
                 shape=(size, self.data_producer.dimension()),
                 order='C',
                 dtype=np.float32)