Beispiel #1
0
    def __iter__(self):
        """Yield a batch of (input, output) from the data loader, with the inputs normalized.

        :return: batch of (input, output).
        :rtype: (torch.Tensor, torch.Tensor)
        """
        stream = cuda.Stream(self.device)
        first_entry = True
        for next_input, next_target in self.data_loader:
            with cuda.stream(stream):
                # Pre-load a batch of input and targets to the GPU, and normalize the input:
                next_input = next_input.to(self.device, non_blocking=True)
                next_target = next_target.to(self.device, non_blocking=True)
                next_input = next_input.float()
                next_input = next_input.sub_(self.data_mean).div_(
                    self.data_std)
            if not first_entry:
                yield input, target  # Yield the pre-loaded batch of input and targets.
            else:
                # On the first entry, we have to do the pre-loading step twice (as nothing as been pre-loaded before!)
                first_entry = False
            cuda.current_stream().wait_stream(stream)
            input = next_input
            target = next_target
        yield input, target
Beispiel #2
0
def Infer2Img(im, pred, meta, sz=1.0, info=''): # BGR
    t = time(); cuda.current_stream().synchronize()
    out = old = pred(im)['instances'].to('cpu')
    #print(id_cls(meta.thing_classes)); print(meta); print(out)
    out = Sift_CLS(out, meta, [1,2,'door_unknown'], 'include')
    #out = Sift_BOX(out, meta, wt=weight); sc = max(1,720/im.shape[0])
    if len(old)!=len(out): print(old.pred_classes,'->',out.pred_classes)
    vis = Visualizer(im[:,:,::-1], metadata=meta, scale=1.0,
            instance_mode=ColorMode.SEGMENTATION); sz = NewSZ(im,sz)
    im = vis.draw_instance_predictions(out).get_image()[:,:,::-1]
    #sleep(0.9+0.1*np.random.rand())
    h,w = im.shape[:2]; im = cv2.UMat(im) # for OpenCV 4.2+
    cuda.current_stream().synchronize(); t = (time()-t)*1000
    cv2.putText(im, '%.1fms'%t, (w-75,h-8), 4, 0.5, (0,255,255), 1)
    cv2.putText(im, info, (5,h-8), 4, 0.5, (0,255,255), 1)
    return cv2.resize(im.get(),sz), out
Beispiel #3
0
    def cuda(
            self,
            device: Optional[Union[torch_device, str, int]] = None
    ) -> 'CudaFloatingPointTile':
        if self.stream != current_stream(device):
            raise CudaError("Cannot switch streams of existing Cuda tiles")

        return self
Beispiel #4
0
    def cuda(
            self,
            device: Optional[Union[torch_device, str, int]] = None
    ) -> 'CudaAnalogTile':
        if self.stream != current_stream(device):
            raise CudaError('Cannot switch CUDA devices of existing Cuda tiles')

        return self
Beispiel #5
0
    def cuda(
        self,
        device: Optional[Union[torch_device, str, int]] = None
    ) -> 'CudaInferenceTile':
        if self.stream != current_stream(device):
            raise ValueError(
                "Cannot switch CUDA devices of existing Cuda tiles")

        return self
Beispiel #6
0
    def __init__(self, source_tile: FloatingPointTile):
        if not cuda.is_compiled():
            raise CudaError('aihwkit has not been compiled with CUDA support')

        # Create a new instance of the rpu config.
        new_rpu_config = deepcopy(source_tile.rpu_config)

        # Create the tile, replacing the simulator tile.
        super().__init__(source_tile.out_size, source_tile.in_size, new_rpu_config,
                         source_tile.bias, source_tile.in_trans, source_tile.out_trans)
        self.tile = tiles.CudaFloatingPointTile(source_tile.tile)

        # Set the cuda properties
        self.stream = current_stream()
        self.device = torch_device(current_device())
Beispiel #7
0
def _async_copy_stream(inputs, device_ids):
    nr_devs = len(device_ids)
    assert type(inputs) in (tuple, list)
    assert len(inputs) == nr_devs

    outputs = []
    streams = [_get_stream(d) for d in device_ids]
    for i, dev, stream in zip(inputs, device_ids, streams):
        with cuda.device(dev):
            main_stream = cuda.current_stream()
            with cuda.stream(stream):
                outputs.append(async_copy_to(i, dev, main_stream=main_stream))
            main_stream.wait_stream(stream)

    return outputs
def _async_copy_stream(inputs, device_ids):
    nr_devs = len(device_ids)
    assert type(inputs) in (tuple, list)
    assert len(inputs) == nr_devs

    outputs = []
    streams = [_get_stream(d) for d in device_ids]
    for i, dev, stream in zip(inputs, device_ids, streams):
        with cuda.device(dev):
            main_stream = cuda.current_stream()
            with cuda.stream(stream):
                outputs.append(async_copy_to(i, dev, main_stream=main_stream))
            main_stream.wait_stream(stream)

    return outputs
Beispiel #9
0
    def __init__(self,
                 out_size: int,
                 in_size: int,
                 resistive_device: Optional[BaseResistiveDevice] = None,
                 bias: bool = False,
                 in_trans: bool = False,
                 out_trans: bool = False):
        if not cuda.is_compiled():
            raise RuntimeError(
                'aihwkit has not been compiled with CUDA support')
        super().__init__(out_size, in_size, resistive_device, bias, in_trans,
                         out_trans)

        self.tile = tiles.CudaAnalogTile(self.tile)
        self.stream = current_stream()
        self.device = torch_device(current_device())
Beispiel #10
0
    def __init__(self, source_tile: AnalogTile):
        if not cuda.is_compiled():
            raise RuntimeError(
                'aihwkit has not been compiled with CUDA support')

        # Create a new instance of the resistive device.
        new_resistive_device = deepcopy(source_tile.resistive_device)

        # Create the tile, replacing the simulator tile.
        super().__init__(source_tile.out_size, source_tile.in_size,
                         new_resistive_device, source_tile.bias,
                         source_tile.in_trans, source_tile.out_trans)
        self.tile = tiles.CudaAnalogTile(source_tile.tile)

        # Set the cuda properties
        self.stream = current_stream()
        self.device = torch_device(current_device())
Beispiel #11
0
def _async_copy_stream(inputs, device_ids):
    # nr_devs = len(device_ids)
    assert type(inputs) in (tuple, list)
    # assert len(inputs) == nr_devs

    outputs = [[] for i in range(len(inputs))]
    streams = [_get_stream(d) for d in device_ids]
    for i, dev, stream in zip(inputs, device_ids, streams):
        with cuda.device(dev):
            main_stream = cuda.current_stream()
            with cuda.stream(stream):
                for obj in i:
                    outputs[dev].append(
                        async_copy_to(obj, dev, main_stream=main_stream))
            main_stream.wait_stream(stream)

    return outputs