def __next__(self): if self._current_index < self.number_of_steps: number_of_threads_in_current_step = min( self.number_of_threads, self.number_of_steps - self._current_index) with concurrent.futures.ThreadPoolExecutor() as executor: threads = [ executor.submit( MultiThreadTimestampIterator.get_data_from_file, self.data, self.current_dataset + i) for i in range(number_of_threads_in_current_step) ] data_from_multiple_files = () for thread in threads: data_from_multiple_files += (thread.result(), ) stacked_data_from_multiple_files = np.hstack( data_from_multiple_files) selection = self.__get_selection(number_of_threads_in_current_step) data_chunk = DataChunk(data=stacked_data_from_multiple_files, selection=selection) self._current_index += number_of_threads_in_current_step self.current_dataset += number_of_threads_in_current_step del stacked_data_from_multiple_files return data_chunk raise StopIteration
def __next__(self): """Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved.""" next_chunk = [] # Determine the range of channels to be read start_index = self.current_fileindex stop_index = start_index + 1 #self.buffer_size if stop_index > self.data.get_number_of_files(): stop_index = self.data.get_number_of_files() # Read the data from all current channels for i in range(start_index, stop_index): next_chunk.append(self.data.read_channel(i)) # Read a single HTK file with the data of one electrode next_chunk_size = len(next_chunk) # If didn't read any channels then return None, None if next_chunk_size == 0: raise StopIteration # If we had data, then determine the chunk location and convert the data to numpy, and return else: self.current_fileindex = stop_index next_chunk = np.asarray(next_chunk) if self.time_axis_first: next_chunk = np.swapaxes(next_chunk, 0, 1) else: next_chunk_location = np.s_[start_index:stop_index, ...] if self.__has_bands: next_chunk_location = np.s_[:, start_index:stop_index, :] else: next_chunk_location = np.s_[:, start_index:stop_index] next_chunk = next_chunk[:,:,0] #print(next_chunk.shape, next_chunk_location) return DataChunk(next_chunk, next_chunk_location)
def __next__(self): if self._current_index < self.number_of_steps: data_from_file = self.__get_timestamps() selection = self._get_selection() data_chunk = DataChunk(data=data_from_file, selection=selection) self._current_index += 1 self.current_dataset += 1 del data_from_file return data_chunk raise StopIteration
def __next__(self): """ Return in each iteration the data from a single file """ if self.__curr_index < len(channel_files): newfp = np.memmap(channel_files[self.__curr_index], dtype='float64', mode='r', shape=(self.num_steps, )) curr_data = newfp[:] i = self.__curr_index self.__curr_index += 1 del newfp return DataChunk(data=curr_data, selection=np.s_[:, i]) else: raise StopIteration
def __next__(self): if self._current_index < self.number_of_steps: data_from_file = self.__get_data_from_file() selection = self._get_selection() data_chunk = DataChunk(data=data_from_file, selection=selection) self._current_index += 1 self.current_file += 1 if self.current_file >= self.number_of_files_in_single_dataset: self.current_dataset += 1 self.current_file = 0 del data_from_file return data_chunk raise StopIteration
def __next__(self): """ Return in each iteration a fully occupied data chunk of self.chunk_shape values at a random location within the matrix. Chunks are non-overlapping. REMEMBER: h5py does not support all fancy indexing that numpy does so we need to make sure our selection can be handled by the backend. """ if self.__chunks_created < self.num_chunks: data = np.random.rand(np.prod(self.chunk_shape)).reshape( self.chunk_shape) xmin = np.random.randint(0, int( self.shape[0] / self.chunk_shape[0]), 1)[0] * self.chunk_shape[0] xmax = xmin + self.chunk_shape[0] ymin = np.random.randint(0, int( self.shape[1] / self.chunk_shape[1]), 1)[0] * self.chunk_shape[1] ymax = ymin + self.chunk_shape[1] self.__chunks_created += 1 return DataChunk(data=data, selection=np.s_[xmin:xmax, ymin:ymax]) else: raise StopIteration
def __next__(self): """ Return in each iteration a fully occupied data chunk of self.chunk_shape values at selected location within the data """ if self.__chunks_created < self.chunk_count: chunk_i = self.__chunks_created chunk_index = self.chunk_index_array[chunk_i] tmin, xmin, ymin, zmin = chunk_index * self.chunk_shape tmax, xmax, ymax, zmax = chunk_index * self.chunk_shape + self.chunk_shape tmax, xmax, ymax, zmax = np.clip([tmax, xmax, ymax, zmax], np.zeros(len(self.shape), dtype=int), self.shape) selection = np.s_[tmin:tmax, xmin:xmax, ymin:ymax, zmin:zmax] data = self.data[selection] self.__chunks_created += 1 return DataChunk(data=data, selection=selection) else: raise StopIteration
def __next__(self): if self._current_index < self.number_of_steps: number_of_threads_in_current_step = min( self.number_of_threads, self.number_of_files_in_single_dataset - self.current_file) with concurrent.futures.ThreadPoolExecutor() as executor: threads = [ executor.submit(MultiThreadDataIterator.get_data_from_file, self.data, self.current_dataset, self.current_file + i) for i in range(number_of_threads_in_current_step) ] data_from_multiple_files = () for thread in threads: data_from_multiple_files += (thread.result(), ) stacked_data_from_multiple_files = np.hstack( data_from_multiple_files) selection = self.get_selection( number_of_threads=number_of_threads_in_current_step, current_dataset=self.current_dataset, dataset_file_length=self.dataset_file_length, current_file=self.current_file, number_of_rows=self.number_of_rows) data_chunk = DataChunk(data=stacked_data_from_multiple_files, selection=selection) self._current_index += number_of_threads_in_current_step self.current_file += number_of_threads_in_current_step if self.current_file >= self.number_of_files_in_single_dataset: self.current_dataset += 1 self.current_file = 0 del stacked_data_from_multiple_files return data_chunk raise StopIteration
def test_datachunk_astype(self): obj = DataChunk(data=np.arange(3), selection=np.s_[0:3]) newtype = np.dtype('int16') obj_astype = obj.astype(newtype) self.assertNotEqual(id(obj), id(obj_astype)) self.assertEqual(obj_astype.dtype, np.dtype(newtype))
def test_datachunk_deepcopy(self): obj = DataChunk(data=np.arange(3), selection=np.s_[0:3]) obj_copy = deepcopy(obj) self.assertNotEqual(id(obj), id(obj_copy)) self.assertNotEqual(id(obj.data), id(obj_copy.data)) self.assertNotEqual(id(obj.selection), id(obj_copy.selection))
def test_astype(self): temp1 = DataChunk(np.arange(10).reshape(5, 2)) temp2 = temp1.astype('float32') self.assertEqual(temp2.dtype, np.dtype('float32'))
def test_dtype(self): temp = DataChunk(np.arange(10).astype('int')) temp_dtype = temp.dtype self.assertEqual(temp_dtype, np.dtype('int'))
def test_len_operator_with_data(self): temp = DataChunk(np.arange(10).reshape(5, 2)) self.assertEqual(len(temp), 5)
def test_len_operator_no_data(self): temp = DataChunk() self.assertEqual(len(temp), 0)