def test_human_readable_byte_size(self): n_bytes = 1393500160 # about 1.394 GB self.assertEqual( bytes_to_string(n_bytes), "1.3GB", "this number of bytes should " "result in 1.3GB and not in %s" % bytes_to_string(n_bytes)) self.assertEqual(bytes_to_string(0), "0B", "0 bytes should result in \"0B\"")
def _init_in_memory_chunks(self, size): # check if we need to allocate memory. if hasattr(self, '_in_memory_chunks') and self._in_memory_chunks.size == size: assert hasattr(self, '_in_memory_chunks') self.logger.info("re-use in memory data.") return elif self._check_resume_iteration() and not self._in_memory_chunks_set: pass #self.logger.warning('Resuming kmeans iteration without the setting "keep_data=True", will re-create' # ' the linear in-memory data. This is inefficient! Consider setting keep_data=True,' # ' when you intend to resume.') available_mem = psutil.virtual_memory().available required_mem = self._calculate_required_memory(size) if required_mem <= available_mem: self._in_memory_chunks = np.empty(shape=(size, self.data_producer.dimension()), order='C', dtype=np.float32) else: if self.oom_strategy == 'raise': self.logger.warning('K-means failed to load all the data (%s required, %s available) into memory. ' 'Consider using a larger stride or set the oom_strategy to \'memmap\' which works ' 'with a memmapped temporary file.' % (bytes_to_string(required_mem), bytes_to_string(available_mem))) raise MemoryError() else: self.logger.warning('K-means failed to load all the data (%s required, %s available) into memory ' 'and now uses a memmapped temporary file which is comparably slow. ' 'Consider using a larger stride.' % (bytes_to_string(required_mem), bytes_to_string(available_mem))) self._in_memory_chunks = np.memmap(tempfile.mkstemp()[1], mode="w+", shape=(size, self.data_producer.dimension()), order='C', dtype=np.float32)
def _init_in_memory_chunks(self, size): available_mem = psutil.virtual_memory().available required_mem = self._calculate_required_memory(size) if required_mem <= available_mem: self._in_memory_chunks = np.empty( shape=(size, self.data_producer.dimension()), order='C', dtype=np.float32) else: if self.oom_strategy == 'raise': self.logger.warning( 'K-means failed to load all the data (%s required, %s available) into memory. ' 'Consider using a larger stride or set the oom_strategy to \'memmap\' which works ' 'with a memmapped temporary file.' % (bytes_to_string(required_mem), bytes_to_string(available_mem))) raise MemoryError() else: self.logger.warning( 'K-means failed to load all the data (%s required, %s available) into memory ' 'and now uses a memmapped temporary file which is comparably slow. ' 'Consider using a larger stride.' % (bytes_to_string(required_mem), bytes_to_string(available_mem))) self._in_memory_chunks = np.memmap( tempfile.mkstemp()[1], mode="w+", shape=(size, self.data_producer.dimension()), order='C', dtype=np.float32)