def __iter__(self): """Yield a batch of (input, output) from the data loader, with the inputs normalized. :return: batch of (input, output). :rtype: (torch.Tensor, torch.Tensor) """ stream = cuda.Stream(self.device) first_entry = True for next_input, next_target in self.data_loader: with cuda.stream(stream): # Pre-load a batch of input and targets to the GPU, and normalize the input: next_input = next_input.to(self.device, non_blocking=True) next_target = next_target.to(self.device, non_blocking=True) next_input = next_input.float() next_input = next_input.sub_(self.data_mean).div_( self.data_std) if not first_entry: yield input, target # Yield the pre-loaded batch of input and targets. else: # On the first entry, we have to do the pre-loading step twice (as nothing as been pre-loaded before!) first_entry = False cuda.current_stream().wait_stream(stream) input = next_input target = next_target yield input, target
def Infer2Img(im, pred, meta, sz=1.0, info=''): # BGR t = time(); cuda.current_stream().synchronize() out = old = pred(im)['instances'].to('cpu') #print(id_cls(meta.thing_classes)); print(meta); print(out) out = Sift_CLS(out, meta, [1,2,'door_unknown'], 'include') #out = Sift_BOX(out, meta, wt=weight); sc = max(1,720/im.shape[0]) if len(old)!=len(out): print(old.pred_classes,'->',out.pred_classes) vis = Visualizer(im[:,:,::-1], metadata=meta, scale=1.0, instance_mode=ColorMode.SEGMENTATION); sz = NewSZ(im,sz) im = vis.draw_instance_predictions(out).get_image()[:,:,::-1] #sleep(0.9+0.1*np.random.rand()) h,w = im.shape[:2]; im = cv2.UMat(im) # for OpenCV 4.2+ cuda.current_stream().synchronize(); t = (time()-t)*1000 cv2.putText(im, '%.1fms'%t, (w-75,h-8), 4, 0.5, (0,255,255), 1) cv2.putText(im, info, (5,h-8), 4, 0.5, (0,255,255), 1) return cv2.resize(im.get(),sz), out
def cuda( self, device: Optional[Union[torch_device, str, int]] = None ) -> 'CudaFloatingPointTile': if self.stream != current_stream(device): raise CudaError("Cannot switch streams of existing Cuda tiles") return self
def cuda( self, device: Optional[Union[torch_device, str, int]] = None ) -> 'CudaAnalogTile': if self.stream != current_stream(device): raise CudaError('Cannot switch CUDA devices of existing Cuda tiles') return self
def cuda( self, device: Optional[Union[torch_device, str, int]] = None ) -> 'CudaInferenceTile': if self.stream != current_stream(device): raise ValueError( "Cannot switch CUDA devices of existing Cuda tiles") return self
def __init__(self, source_tile: FloatingPointTile): if not cuda.is_compiled(): raise CudaError('aihwkit has not been compiled with CUDA support') # Create a new instance of the rpu config. new_rpu_config = deepcopy(source_tile.rpu_config) # Create the tile, replacing the simulator tile. super().__init__(source_tile.out_size, source_tile.in_size, new_rpu_config, source_tile.bias, source_tile.in_trans, source_tile.out_trans) self.tile = tiles.CudaFloatingPointTile(source_tile.tile) # Set the cuda properties self.stream = current_stream() self.device = torch_device(current_device())
def _async_copy_stream(inputs, device_ids): nr_devs = len(device_ids) assert type(inputs) in (tuple, list) assert len(inputs) == nr_devs outputs = [] streams = [_get_stream(d) for d in device_ids] for i, dev, stream in zip(inputs, device_ids, streams): with cuda.device(dev): main_stream = cuda.current_stream() with cuda.stream(stream): outputs.append(async_copy_to(i, dev, main_stream=main_stream)) main_stream.wait_stream(stream) return outputs
def __init__(self, out_size: int, in_size: int, resistive_device: Optional[BaseResistiveDevice] = None, bias: bool = False, in_trans: bool = False, out_trans: bool = False): if not cuda.is_compiled(): raise RuntimeError( 'aihwkit has not been compiled with CUDA support') super().__init__(out_size, in_size, resistive_device, bias, in_trans, out_trans) self.tile = tiles.CudaAnalogTile(self.tile) self.stream = current_stream() self.device = torch_device(current_device())
def __init__(self, source_tile: AnalogTile): if not cuda.is_compiled(): raise RuntimeError( 'aihwkit has not been compiled with CUDA support') # Create a new instance of the resistive device. new_resistive_device = deepcopy(source_tile.resistive_device) # Create the tile, replacing the simulator tile. super().__init__(source_tile.out_size, source_tile.in_size, new_resistive_device, source_tile.bias, source_tile.in_trans, source_tile.out_trans) self.tile = tiles.CudaAnalogTile(source_tile.tile) # Set the cuda properties self.stream = current_stream() self.device = torch_device(current_device())
def _async_copy_stream(inputs, device_ids): # nr_devs = len(device_ids) assert type(inputs) in (tuple, list) # assert len(inputs) == nr_devs outputs = [[] for i in range(len(inputs))] streams = [_get_stream(d) for d in device_ids] for i, dev, stream in zip(inputs, device_ids, streams): with cuda.device(dev): main_stream = cuda.current_stream() with cuda.stream(stream): for obj in i: outputs[dev].append( async_copy_to(obj, dev, main_stream=main_stream)) main_stream.wait_stream(stream) return outputs