def binary_search_helper(dimension, logger, model_name="EDSR", device="cuda"):
    """
    Process random image and calculates processing time

    Parameters
    ----------
    dimension : int
        random image dimension.
    logger : logger
        keep logs.
    device : str, optional
        GPU or CPU. The default is 'cuda'.

    Returns
    -------
    total_time : float
        EDSR processing time.

    """
    print('Before loading model: ')
    subprocess.run("gpustat", shell=True)
    print()
    total_time = 0
    try:
        model = None
        if model_name == "EDSR":
            model = md.load_edsr(device=device)
            print('After loading model: ')
            subprocess.run("gpustat", shell=True)
            print()
        elif model_name == "RRDB":
            model = md.load_rrdb(device=device)
        else:
            raise Exception("Unknown model...")
        model.eval()
        input_image = ut.random_image(dimension)
        if model_name == "RRDB":
            input_image = input_image[:, 2:, :, :]
        input_image = input_image.to(device)
        with torch.no_grad():
            start = time.time()
            print('Before processing: ')
            subprocess.run("gpustat", shell=True)
            output_image = model(input_image)
            print('After processing: ')
            subprocess.run("gpustat", shell=True)
            end = time.time()
            total_time = end - start
            ut.clear_cuda(input_image, output_image)
        model.cpu()
        del model
        print('After model shifting and deleting: ')
        subprocess.run("gpustat", shell=True)
    except RuntimeError as err:
        logger.error("Runtime error for dimension: {}x{}: " + err)
        sys.exit(1)
    return total_time
Esempio n. 2
0
def trt_forward_chop_iterative(
    x,
    trt_engine_path=None,
    shave=10,
    min_size=1024,
    device="cuda",
    print_result=True,
    scale=4,
    use_fp16=False,
):
    """
    Forward chopping in an iterative way

    Parameters
    ----------
    x : tensor
        input image.
    model : nn.Module, optional
        SR model. The default is None.
    shave : int, optional
        patch shave value. The default is 10.
    min_size : int, optional
        total patch size (dimension x dimension) . The default is 1024.
    device : int, optional
        GPU or CPU. The default is 'cuda'.
    print_result : bool, optional
        print result or not. The default is True.

    Returns
    -------
    output : tensor
        output image.
    total_time : float
        total execution time.
    total_crop_time : float
        total cropping time.
    total_shift_time : float
        total GPU to CPU shfiting time.
    total_clear_time : float
        total GPU clearing time.

    """
    dim = int(math.sqrt(min_size))  # getting patch dimension
    b, c, h, w = x.size()  # current image batch, channel, height, width
    device = device
    patch_count = 0
    output = torch.tensor(np.zeros((b, c, h * 4, w * 4))).numpy()
    total_time = 0
    total_crop_time = 0
    total_shift_time = 0
    total_clear_time = 0
    extra = x.clone().detach()
    f = open(trt_engine_path, "rb")
    runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
    new_i_s = 0
    stream = cuda.Stream()
    for i in range(0, h, dim - 2 * shave):
        new_j_s = 0
        new_j_e = 0
        for j in range(0, w, dim - 2 * shave):
            patch_count += 1
            h_s, h_e = i, min(h, i + dim)  # patch height start and end
            w_s, w_e = j, min(w, j + dim)  # patch width start and end
            lr = x[:, :, h_s:h_e, w_s:w_e]
            ba, ch, ht, wt = lr.shape
            print('\nx: {}\n'.format(x))
            print('\nlr: {}\n'.format(lr))
            input_lr = torch.tensor(lr).int()
            output_folder = "output_images"
            file_name = "data/test7.jpg".split("/")[-1].split(".")[0]
            ut.save_image(input_lr[0].int(),
                          output_folder,
                          ht,
                          wt,
                          4,
                          output_file_name=file_name + f"input_{i}_{j}_x4")
            lr = lr.numpy()
            print(f"shape of lr:{lr.shape}")

            # EDSR processing
            start = time.time()
            # torch.cuda.synchronize()
            USE_FP16 = use_fp16
            target_dtype = np.float16 if USE_FP16 else np.float32
            ba, ch, ht, wt = lr.shape

            lr = np.ascontiguousarray(lr, dtype=np.float32)

            # need to set input and output precisions to FP16 to fully enable it
            p_output = np.empty([b, c, ht * scale, wt * scale],
                                dtype=target_dtype)

            # allocate device memory
            d_input = cuda.mem_alloc(1 * lr.nbytes)
            d_output = cuda.mem_alloc(1 * p_output.nbytes)

            bindings = [int(d_input), int(d_output)]

            sr = predict(context, lr, d_input, stream, bindings, p_output,
                         d_output)

            output_sr = torch.tensor(sr).int()
            output_folder = "output_images"
            file_name = "data/test7.jpg".split("/")[-1].split(".")[0]
            ut.save_image(output_sr[0],
                          output_folder,
                          ht,
                          wt,
                          4,
                          output_file_name=file_name + f"{i}_{j}_x4")

            # torch.cuda.synchronize()
            end = time.time()
            processing_time = end - start
            total_time += processing_time

            # new cropped patch's dimension (h and w)
            n_h_s, n_h_e, n_w_s, n_w_e = 0, 0, 0, 0

            n_h_s = 0 if h_s == 0 else (shave * 4)
            n_h_e = ((h_e - h_s) * 4) if h_e == h else (((h_e - h_s) - shave) *
                                                        4)
            new_i_e = new_i_s + n_h_e - n_h_s

            n_w_s = 0 if w_s == 0 else (shave * 4)
            n_w_e = ((w_e - w_s) * 4) if w_e == w else (((w_e - w_s) - shave) *
                                                        4)
            new_j_e = new_j_e + n_w_e - n_w_s

            # corpping image in
            crop_start = time.time()
            sr_small = sr[:, :, n_h_s:n_h_e, n_w_s:n_w_e]
            crop_end = time.time()
            crop_time = crop_end - crop_start
            total_crop_time += crop_time
            output[:, :, new_i_s:new_i_e, new_j_s:new_j_e] = sr_small
            del sr_small
            clear_start = time.time()
            if device == "cuda":
                ut.clear_cuda(None, None)
            clear_end = time.time()
            clear_time = clear_end - clear_start
            total_clear_time += clear_time
            if w_e == w:
                break
            new_j_s = new_j_e

        new_i_s = new_i_e

        if h_e == h:
            break
    return output, total_time, total_crop_time, total_shift_time, total_clear_time
Esempio n. 3
0
def trt_forward_chop_iterative_v2(
    x,
    trt_engine_path=None,
    shave=10,
    min_size=1024,
    device="cuda",
    print_result=True,
    scale=4,
    use_fp16=False,
):
    """
    

    Parameters
    ----------
    x : 4d array
        input image.
    trt_engine_path : str, optional
        path of the trt engine. The default is None.
    shave : int, optional
        shave value. The default is 10.
    min_size : int, optional
        total size of the image. The default is 1024.
    device : str, optional
        device cuda or cpu. The default is "cuda".
    print_result : bool, optional
        print result or not. The default is True.
    scale : int, optional
        hr = scale * lr. The default is 4.
    use_fp16 : bool, optional
        choose precision. The default is False.

    Raises
    ------
    Exception
        DESCRIPTION.

    Returns
    -------
    output : TYPE
        DESCRIPTION.

    """
    patch_count = 0
    row_count = 0
    column_count = 0

    dim = int(math.sqrt(min_size))  # getting patch dimension
    b, c, img_height, img_width = x.size(
    )  # current image batch, channel, height, width

    device = device
    output = torch.tensor(np.zeros(
        (b, c, img_height * 4, img_width * 4))).numpy()

    f = open(trt_engine_path, "rb")
    runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    new_i_s = 0

    new_i_s = 0  # new patch height start
    for patch_height_start in range(0, img_height, dim - 2 * shave):
        row_count += 1
        right_most = False
        bottom_most = False
        left_increased = 0
        top_increased = 0
        new_j_s = 0
        new_j_e = 0
        for patch_width_start in range(0, img_width, dim - 2 * shave):
            column_count += 1
            patch_count += 1
            patch_height_end = min(img_height, patch_height_start + dim)
            patch_width_end = min(img_width, patch_width_start + dim)

            if img_height < patch_height_start + dim:
                bottom_most = True
                old_patch_height_start = patch_height_start
                patch_height_start = img_height - dim
                patch_height_start = ((img_height - dim) if
                                      (img_height - dim) >= 0 else 0)
                top_increased = old_patch_height_start - patch_height_start

            if img_width < patch_width_start + dim:
                right_most = True
                old_patch_width_start = patch_width_start
                patch_width_start = img_width - dim
                patch_width_start = (img_width -
                                     dim) if (img_width - dim) >= 0 else 0
                left_increased = old_patch_width_start - patch_width_start

            left_crop, top_crop, right_crop, bottom_crop = (
                0,
                0,
                shave * scale,
                shave * scale,
            )

            if patch_width_start != 0:
                if right_most == True:
                    left_crop = (shave + left_increased) * scale
                else:
                    left_crop = shave * scale

            if patch_height_start != 0:
                if bottom_most == True:
                    top_crop = (shave + top_increased) * scale
                else:
                    top_crop = shave * scale

            if patch_width_end == img_width:
                right_crop = 0

            if patch_height_end == img_height:
                bottom_crop = 0

            # =============================================================================
            #             print('Patch no: {}, Row: {}, Column: {}\n'.format(patch_count, row_count, column_count))
            #             print('{}x{}:{}x{}'.format(patch_height_start, patch_height_end, patch_width_start, patch_width_end ))
            #             print('SR Patch size: {}x{}'.format(dim*scale, dim*scale))
            # =============================================================================

            h_s, h_e, w_s, w_e = (
                0 + top_crop,
                dim * scale - bottom_crop,
                0 + left_crop,
                dim * scale - right_crop,
            )
            # =============================================================================
            #             print('hs, he, ws, we', h_s, h_e, w_s, w_e)
            # =============================================================================
            if dim >= img_height and dim >= img_width:
                h_s, h_e, w_s, w_e = 0, img_height * scale, 0, img_width * scale
            elif dim < img_height and dim >= img_width:
                w_s, w_e = 0, img_width * scale
            elif dim >= img_height and dim < img_width:
                h_s, h_e = 0, img_height * scale

            lr = x[:, :, patch_height_start:patch_height_end,
                   patch_width_start:patch_width_end]
            # =============================================================================
            #             print('x.shape: ',x.shape)
            #             print('lr.shape', lr.shape)
            # =============================================================================
            ba, ch, ht, wt = lr.shape

            lr = lr.numpy()

            # EDSR processing
            start = time.time()
            # torch.cuda.synchronize()
            USE_FP16 = use_fp16
            target_dtype = np.float16 if USE_FP16 else np.float32
            ba, ch, ht, wt = lr.shape

            lr = np.ascontiguousarray(lr, dtype=target_dtype)

            # need to set input and output precisions to FP16 to fully enable it
            p_output = np.empty([b, c, ht * scale, wt * scale],
                                dtype=target_dtype)

            # allocate device memory
            #subprocess.run("gpustat", shell=True)
            d_input = cuda.mem_alloc(1 * lr.nbytes)
            d_output = cuda.mem_alloc(1 * p_output.nbytes)
            #subprocess.run("gpustat", shell=True)
            bindings = [int(d_input), int(d_output)]
            stream = cuda.Stream()
            sr = predict(context, lr, d_input, stream, bindings, p_output,
                         d_output)
            #subprocess.run("gpustat", shell=True)
            new_i_e = new_i_s + h_e - h_s
            new_j_e = new_j_s + w_e - w_s
            patch_crop_positions = [h_s, h_e, w_s, w_e]
            SR_positions = [new_i_s, new_i_e, new_j_s, new_j_e]

            # torch.cuda.synchronize()
            end = time.time()
            processing_time = end - start

            sr_small = sr[:, :, h_s:h_e, w_s:w_e]
            output[:, :, new_i_s:new_i_e, new_j_s:new_j_e] = sr_small
            del sr_small
            #subprocess.run("gpustat", shell=True)
            clear_start = time.time()
            if device == "cuda":
                ut.clear_cuda(None, None)

            new_j_s = new_j_e
            if patch_width_end == img_width:
                break
        new_i_s = new_i_e
        column_count = 0
        if patch_height_end == img_height:
            break

    if patch_count == 0:
        raise Exception("Shave size too big for given patch dimension")
    #subprocess.run("gpustat", shell=True)
    return output
Esempio n. 4
0
def forward_chop_iterative(x,
                           model=None,
                           shave=10,
                           min_size=1024,
                           device="cuda",
                           print_result=True):
    """
    Forward chopping in an iterative way

    Parameters
    ----------
    x : tensor
        input image.
    model : nn.Module, optional
        SR model. The default is None.
    shave : int, optional
        patch shave value. The default is 10.
    min_size : int, optional
        total patch size (dimension x dimension) . The default is 1024.
    device : int, optional
        GPU or CPU. The default is 'cuda'.
    print_result : bool, optional
        print result or not. The default is True.

    Returns
    -------
    output : tensor
        output image.
    total_time : float
        total execution time.
    total_crop_time : float
        total cropping time.
    total_shift_time : float
        total GPU to CPU shfiting time.
    total_clear_time : float
        total GPU clearing time.

    """
    dim = int(math.sqrt(min_size))  # getting patch dimension
    b, c, h, w = x.size()  # current image batch, channel, height, width
    device = device
    patch_count = 0
    output = torch.tensor(np.zeros((b, c, h * 4, w * 4)))
    total_time = 0
    total_crop_time = 0
    total_shift_time = 0
    total_clear_time = 0
    # =============================================================================
    #     if device == "cuda":
    #         torch.cuda.synchronize()
    #         x = x.to(device)
    # =============================================================================

    new_i_s = 0
    for i in range(0, h, dim - 2 * shave):
        new_j_s = 0
        new_j_e = 0
        for j in range(0, w, dim - 2 * shave):
            patch_count += 1
            h_s, h_e = i, min(h, i + dim)  # patch height start and end
            w_s, w_e = j, min(w, j + dim)  # patch width start and end
            # subprocess.run("gpustat", shell=True)
            # =============================================================================
            #             print(
            #                 "Patch no: {} : {}-{}x{}-{}\n".format(patch_count, h_s, h_e, w_s, w_e)
            #             )
            # =============================================================================

            lr = x[:, :, h_s:h_e, w_s:w_e]
            # =============================================================================
            #             print(lr.shape)
            # =============================================================================
            if device == "cuda":
                torch.cuda.synchronize()
                lr = lr.to(device)
            with torch.no_grad():
                # EDSR processing
                start = time.time()
                torch.cuda.synchronize()
                sr = model(lr)
                torch.cuda.synchronize()
                end = time.time()
                processing_time = end - start
                total_time += processing_time
            # =============================================================================
            #             print('Processing time: ', processing_time)
            # =============================================================================

            shift_start = time.time()
            torch.cuda.synchronize()
            sr = sr.cpu()
            torch.cuda.synchronize()
            shift_end = time.time()
            shift_time = shift_end - shift_start

            # new cropped patch's dimension (h and w)
            n_h_s, n_h_e, n_w_s, n_w_e = 0, 0, 0, 0

            n_h_s = 0 if h_s == 0 else (shave * 4)
            n_h_e = ((h_e - h_s) * 4) if h_e == h else (((h_e - h_s) - shave) *
                                                        4)
            new_i_e = new_i_s + n_h_e - n_h_s

            n_w_s = 0 if w_s == 0 else (shave * 4)
            n_w_e = ((w_e - w_s) * 4) if w_e == w else (((w_e - w_s) - shave) *
                                                        4)
            new_j_e = new_j_e + n_w_e - n_w_s

            # corpping image in
            crop_start = time.time()
            sr_small = sr[:, :, n_h_s:n_h_e, n_w_s:n_w_e]
            crop_end = time.time()
            crop_time = crop_end - crop_start
            total_crop_time += crop_time

            # =============================================================================
            #             shift_start = time.time()
            #             if device == "cuda":
            #                 torch.cuda.synchronize()
            #                 sr_small = sr_small.to("cpu")
            #                 torch.cuda.synchronize()
            #             shift_end = time.time()
            #             shift_time = shift_end - shift_start
            # =============================================================================
            total_shift_time += shift_time
            output[:, :, new_i_s:new_i_e, new_j_s:new_j_e] = sr_small
            del sr_small
            clear_start = time.time()
            if device == "cuda":
                ut.clear_cuda(lr, sr)
            clear_end = time.time()
            clear_time = clear_end - clear_start
            total_clear_time += clear_time
            if w_e == w:
                break
            new_j_s = new_j_e

        new_i_s = new_i_e

        if h_e == h:
            break
    # =============================================================================
    #     if print_result == True:
    #         print("Patch dimension: {}x{}".format(dim, dim))
    #         print("Total pacthes: ", patch_count)
    #         print("Total EDSR Processing time: ", total_time)
    #         print("Total crop time: ", total_crop_time)
    #         print("Total shift time: ", total_shift_time)
    #         print("Total clear time: ", total_clear_time)
    # =============================================================================
    return output, total_time, total_crop_time, total_shift_time, total_clear_time
def forward_chop_iterative(x, model=None, shave=10, min_size=1024):
    dim = round(math.sqrt(min_size))
    b, c, h, w = x.size()
    device = "cuda"
    count = 0
    output = torch.tensor(np.zeros((b, c, h * 4, w * 4)))
    total_time = 0
    new_i_s = 0
    x = x.to(device)
    for i in tqdm(range(0, h, dim - 2 * shave)):
        new_j_s = 0
        new_j_e = 0
        # =============================================================================
        #             subprocess.run("gpustat", shell=True)
        # =============================================================================
        for j in range(0, w, dim - 2 * shave):
            # =============================================================================
            #                 print(i,j)
            #                 subprocess.run("gpustat", shell=True)
            # =============================================================================
            count += 1
            h_s = i
            h_e = min(h, i + dim)
            w_s = j
            w_e = min(w, j + dim)
            lr = x[:, :, h_s:h_e, w_s:w_e]
            # =============================================================================
            #                 print('h: {}x{} w: {}x{}'.format(h_s, h_e, w_s, w_e))
            #                 print('current dim: {}x{}'.format(h_e-h_s,w_e-w_s))
            # =============================================================================
            with torch.no_grad():
                # lr = lr.to(device)
                # =============================================================================
                #                     subprocess.run("gpustat", shell=True)
                # =============================================================================
                start = time.time()
                sr = model(lr)
                end = time.time()
                processing_time = end - start
                total_time += processing_time
            # =============================================================================
            #                     subprocess.run("gpustat", shell=True)
            # =============================================================================
            # sr = sr.detach().numpy()
            n_h = (h_e - h_s) * 4
            n_w = (w_e - w_s) * 4

            # =============================================================================
            #                 print('next_dimension: {}x{}'.format(n_h, n_w))
            # =============================================================================
            n_h_s, n_h_e, n_w_s, n_w_e = 0, 0, 0, 0

            if h_s == 0:
                n_h_s = 0
            else:
                n_h_s = shave * 4
            if h_e == h:
                n_h_e = (h_e - h_s) * 4

            else:
                n_h_e = ((h_e - h_s) - shave) * 4

            new_i_e = new_i_s + n_h_e - n_h_s

            if w_s == 0:
                n_w_s = 0
            else:
                n_w_s = shave * 4
            if w_e == w:
                n_w_e = (w_e - w_s) * 4
            else:
                n_w_e = ((w_e - w_s) - shave) * 4
            new_j_e = new_j_e + n_w_e - n_w_s
            sr_small = sr[:, :, n_h_s:n_h_e, n_w_s:n_w_e]
            sr_small = sr_small.to("cpu")
            output[:, :, new_i_s:new_i_e, new_j_s:new_j_e] = sr_small
            del sr_small
            # =============================================================================
            #                 print('new -> h: {}x{} w: {}x{}'.format(n_h_s, n_h_e, n_w_s, n_w_e))
            #                 print('h-> {}:{}, w-> {}:{}'.format(new_i_s, new_i_e, new_j_s, new_j_e))
            #                 print()
            # =============================================================================
            if w_e == w:
                break
            new_j_s = new_j_e
            ut.clear_cuda(lr, sr)
        # =============================================================================
        #                 subprocess.run("gpustat", shell=True)
        #                 print('-----------------------------------------------------')
        # =============================================================================
        # =============================================================================
        #             print('After:')
        #             subprocess.run("gpustat", shell=True)
        #             print()
        # =============================================================================
        new_i_s = new_i_e
        if h_e == h:
            break
    # =============================================================================
    #         print(count)
    #         print(output.shape)
    # =============================================================================
    print("Patch dimension: {}x{}".format(dim, dim))
    print("Total pacthes: ", count)
    print("Total EDSR Processing time: ", total_time)
    return output
Esempio n. 6
0
def batch_forward_chop(
    patch_list,
    batch_size,
    channel,
    img_height,
    img_width,
    dim,
    shave,
    scale,
    model,
    device="cuda",
    print_timer=True,
):
    """
    Create SR image from batches of patches

    Parameters
    ----------
    patch_list : list
        list of patches.
    batch_size : int
        batch size.
    channel : int
        input image channel.
    img_height : int
        input image height.
    img_width : int
        input image width.
    dim : int
        patch dimension.
    shave : int
        shave value for patch.
    scale : int
        scale for LR to SR.
    model : nn.Module
        SR model.
    device : str, optional
        GPU or CPU. The default is 'cuda'.
    print_timer : bool, optional
        Print result or not. The default is True.

    Raises
    ------
    Exception
        DESCRIPTION.

    Returns
    -------
    3D matrix, tuple
        output_image, tuple of timings.

    """
    logger = ut.get_logger()
    total_patches = len(patch_list)
    if batch_size > total_patches:
        sys.exit(2)
        raise Exception("Batch size greater than total number of patches")
    output_image = torch.tensor(
        np.zeros((channel, img_height * scale, img_width * scale)))

    cpu_to_gpu_time = 0
    gpu_to_cpu_time = 0
    batch_creating_time = 0
    total_EDSR_time = 0
    cuda_clear_time = 0
    merging_time = 0
    for start in range(1, total_patches + 1, batch_size):
        info = ""
        try:
            batch_creating_timer = ut.timer()
            batch = []
            end = start + batch_size
            if start + batch_size > total_patches:
                end = total_patches + 1
            for p in range(start, end):
                batch.append(patch_list[p][4])
            batch_creating_time += batch_creating_timer.toc()

            torch.cuda.synchronize()
            cpu_to_gpu_timer = ut.timer()
            batch = torch.stack(batch).to(device)
            torch.cuda.synchronize()
            cpu_to_gpu_time += cpu_to_gpu_timer.toc()
            info = (info + "C2G Starts: " + str(cpu_to_gpu_timer.t0) +
                    "C2G total: " + str(cpu_to_gpu_time))
            # =============================================================================
            #             print(batch.shape)
            #             subprocess.run("gpustat", shell=True)
            # =============================================================================
            with torch.no_grad():
                # =============================================================================
                #                 print(start, end)
                #                 print(sys.getsizeof(batch))
                # =============================================================================
                torch.cuda.synchronize()
                start_time = time.time()
                sr_batch = model(batch)
                torch.cuda.synchronize()
                end_time = time.time()
                processing_time = end_time - start_time
                total_EDSR_time += processing_time
                info = (info + "\tModel Starts: " + str(start_time) +
                        "Model total: " + str(total_EDSR_time))

            torch.cuda.synchronize()
            gpu_to_cpu_timer = ut.timer()
            sr_batch = sr_batch.to("cpu")
            torch.cuda.synchronize()
            gpu_to_cpu_time += gpu_to_cpu_timer.toc()
            info = (info + "\tGPU 2 CPU Starts: " + str(gpu_to_cpu_timer.t0) +
                    "G2C total: " + str(gpu_to_cpu_time))
            _, _, patch_height, patch_width = sr_batch.size()
            logger.info(info)
            batch_id = 0
            merging_timer = ut.timer()
            for p in range(start, end):
                output_image[:, patch_list[p][3][0]:patch_list[p][3][
                    1], patch_list[p][3][2]:patch_list[p][3][3], ] = sr_batch[
                        batch_id][:, patch_list[p][2][0]:patch_list[p][2][1],
                                  patch_list[p][2][2]:patch_list[p][2][3], ]
                batch_id += 1

            merging_time += merging_timer.toc()
            cuda_clear_timer = ut.timer()
            ut.clear_cuda(batch, None)
            cuda_clear_time += cuda_clear_timer.toc()
        except RuntimeError as err:
            ut.clear_cuda(batch, None)
            raise Exception(err)
    model = model.to("cpu")

    if print_timer:
        print("Total upsampling time: {}\n".format(total_EDSR_time))
        print("Total CPU to GPU shifting time: {}\n".format(cpu_to_gpu_time))
        print("Total GPU to CPU shifting time: {}\n".format(gpu_to_cpu_time))
        print("Total batch creation time: {}\n".format(batch_creating_time))
        print("Total merging time: {}\n".format(merging_time))
        print("Total CUDA clear time: {}\n".format(cuda_clear_time))
        print("Total time: {}\n".format(total_EDSR_time + cpu_to_gpu_time +
                                        gpu_to_cpu_time + batch_creating_time +
                                        cuda_clear_time + merging_time))
    return output_image, (
        total_EDSR_time,
        cpu_to_gpu_time,
        gpu_to_cpu_time,
        batch_creating_time,
        cuda_clear_time,
        merging_time,
    )
def maximum_acceptable_dimension(device,
                                 logger,
                                 model,
                                 max_unacceptable_dimension,
                                 model_name="EDSR"):
    """
    Get amximum acceptable dimension

    Parameters
    ----------
    device : str
        device type.
    model : torch.nn.model
        SR model.
    max_unacceptable_dimension : int
        Maximum unacceptable dimension which is apower of 2.

    Returns
    -------
    last : int
        acceptable dimension.

    """
    print("\nGetting maximum acceptable dimension...\n")
    result2 = {}
    dimension = max_unacceptable_dimension
    maxm = math.inf
    minm = -math.inf
    last = 0
    last_used_memory = 0
    iteration = 0
    while True:
        # Printing iterations status
        iteration += 1
        _, used_memory, _ = ut.get_gpu_details(device,
                                               None,
                                               logger,
                                               print_details=False)
        leaked_memory = (used_memory - last_used_memory
                         if used_memory > last_used_memory else 0)
        print(
            "Patch Dimension: {:04}x{:04} | Used Memory: {:09.3f} | Leaked Memory: {:09.3f} | Iteration: {}"
            .format(dimension, dimension, used_memory, leaked_memory,
                    iteration))
        last_used_memory = used_memory

        # Clearing cuda cache:
        ut.clear_cuda(None, None)

        # Binary Search
        if last == dimension:
            break
        process_output = subprocess.run(
            ["python3", "binarysearch_helper.py",
             str(dimension), model_name],
            stdout=subprocess.PIPE,
            text=True,
        )
        if process_output.returncode == 0:
            out = process_output.stdout.split("\n")
            total_time = out[0]
            last = dimension
            if dimension in result2.keys():
                result2[dimension].append(total_time)
            else:
                result2[dimension] = [total_time]
            minm = copy.copy(dimension)
            if maxm == math.inf:
                dimension *= 2
            else:
                dimension = dimension + (maxm - minm) // 2
            ut.clear_cuda(None, None)
        else:
            ut.get_gpu_details(
                device,
                "Runtime error for dimension: {}x{}".format(
                    dimension, dimension),
                logger,
            )
            maxm = copy.copy(dimension)
            if dimension in result2.keys():
                result2[dimension].append(math.inf)
            else:
                result2[dimension] = [math.inf]
            if minm == -math.inf:
                dimension = dimension // 2
            else:
                dimension = minm + (maxm - minm) // 2
            ut.clear_cuda(None, None)
    return last
def build_onnx_trt(model_name, patch_dim, use_precision, verbose):
    if patch_dim == None:
        config = toml.load("../config.toml")
        patch_dim = int(config["max_dim"])
    else:
        patch_dim = int(patch_dim)

    # pytorch to onnx model
    if verbose:
        print("Building ONNX model from the PyTorch model...")
    onnx_model_name = model_name.lower() + "_" + str(use_precision)+ "_" + \
    str(patch_dim) + ".onnx"
    # =============================================================================
    #     omb.build_onnx_model(model_name, patch_dim, onnx_model_name)
    # =============================================================================
    # =============================================================================
    #     command1 = "python3 onnx_model_builder.py " + str(model_name) + " " + \
    #         str(patch_dim) + " " + str(onnx_model_name)
    # =============================================================================
    command1 = [
        "python3", "onnx_model_builder.py",
        str(model_name),
        str(patch_dim),
        str(onnx_model_name)
    ]
    #subprocess.run(command1, shell=True)

    while True:
        process_output = subprocess.run(
            command1,
            stdout=subprocess.PIPE,
            text=True,
        )
        if process_output.returncode != 0:
            ut.clear_cuda(None, None)
            patch_dim -= 1
            print('Memory out. Decreasing patch size. New patch_size = {}'.
                  format(patch_dim))
            onnx_model_name = model_name.lower() + "_" + str(use_precision)+ "_" + \
                    str(patch_dim) + ".onnx"
            command1 = [
                "python3", "onnx_model_builder.py",
                str(model_name),
                str(patch_dim),
                str(onnx_model_name)
            ]

        else:
            ut.clear_cuda(None, None)
            # for linear search
            config = toml.load("../config.toml")
            config["max_dim"] = patch_dim
            f = open("../config.toml", "w")
            toml.dump(config, f)
            break

    # onnx to trt
    if verbose:
        print("Building TRT engine from the ONNX model...")
    trt_model = "inference_models/" + model_name.lower() + "_" + str(use_precision) + \
        "_" + str(patch_dim) + ".trt"
    if use_precision == "fp32":
        command2 = "python3 onnx_trt_util.py " + "inference_models/"+onnx_model_name + " " + \
            str(trt_model) + " 0"
    elif use_precision == "fp16":
        command2 = "python3 onnx_trt_util.py " + "inference_models/"+onnx_model_name + " " + \
            str(trt_model) + " 1"
    subprocess.run(command2, shell=True)
def maximum_unacceptable_dimension_2n(device,
                                      logger,
                                      start_dim=2,
                                      model_name="EDSR"):
    """
    Ge the maximum unacceptable dimension which is apower of 2

    Parameters
    ----------
    device : str
        device type.
    model : torch.nn.model
        SR model.

    Returns
    -------
    last_dimension : int
        unacceptabel dimension.

    """
    print(
        "\nGetting maximum unacceptable dimension which is a power of two...\n"
    )
    result1 = {}
    last_dimension = 0
    dimension = start_dim
    last_used_memory = 0
    iteration = 0
    while True:
        # Prinitng loop status
        iteration += 1
        _, used_memory, _ = ut.get_gpu_details(device,
                                               None,
                                               logger,
                                               print_details=False)
        leaked_memory = (used_memory - last_used_memory
                         if used_memory > last_used_memory else 0)
        print(
            "Patch Dimension: {:04}x{:04} | Used Memory: {:09.3f} | Leaked Memory: {:09.3f} | Iteration: {}"
            .format(dimension, dimension, used_memory, leaked_memory,
                    iteration))
        last_used_memory = used_memory

        # Calling SR model for different dimension
        process_output = subprocess.run(
            ["python3", "binarysearch_helper.py",
             str(dimension), model_name],
            stdout=subprocess.PIPE,
            text=True,
        )
        if process_output.returncode == 0:
            out = process_output.stdout.split("\n")
            total_time = out[0]
            if dimension in result1.keys():
                result1[dimension].append(total_time)
            else:
                result1[dimension] = [total_time]
            dimension *= 2
        else:
            ut.get_gpu_details(
                device,
                "Runtime error for dimension: {}x{}".format(
                    dimension, dimension),
                logger,
            )
            if dimension in result1.keys():
                result1[dimension].append(math.inf)
            else:
                result1[dimension] = [math.inf]

            last_dimension = dimension

            ut.clear_cuda(None, None)
            break
    return last_dimension
def do_binary_search(model_name, start_dim):
    """
    Binary search function...

    Returns
    -------
    None.

    """
    # Prints the header banner
    banner = pyfiglet.figlet_format("Binary Search: " + model_name)
    print(banner)

    # Getting logger
    logger = ut.get_logger()

    # Check valid model or not
    if model_name not in ["EDSR", "RRDB"]:
        logger.exception("{} model is unkknown".format(model_name))
        raise Exception("Unknown model...")

    # Device type cpu or cuda
    device = ut.get_device_type()

    if device == "cpu" and model_name not in ["EDSR"]:
        logger.exception("{} model cannot be run in CPU".format(model_name))
        raise Exception("{} model cannot be run in CPU".format(model_name))

    # Device information
    _, device_name = ut.get_device_details()

    if device == "cuda":
        logger.info("Device: {}, Device Name: {}".format(device, device_name))
        ut.get_gpu_details(
            device,
            "Before binary search: {}".format(model_name),
            logger,
            print_details=True,
        )
    else:
        logger.info("Device: {}, Device Name: {}".format(device, device_name))

    # Clearing cuda cache
    ut.clear_cuda(None, None)

    # Getting the highest unacceptable dimension which is a power of 2
    max_unacceptable_dimension = maximum_unacceptable_dimension_2n(
        device, logger, start_dim=start_dim, model_name=model_name)
    print("\nMaximum unacceptable dimension: {}\n".format(
        max_unacceptable_dimension))

    # Clearing cuda cache
    ut.clear_cuda(None, None)

    # Getting the maximum acceptable dimension
    max_dim = maximum_acceptable_dimension(device,
                                           logger,
                                           None,
                                           max_unacceptable_dimension,
                                           model_name=model_name)
    print("\nMaximum acceptable dimension: {}\n".format(max_dim))

    # Clearing cuda cache
    ut.clear_cuda(None, None)

    # For batch processing
    config = toml.load("../batch_processing.toml")
    config["end_patch_dimension"] = max_dim
    f = open("../batch_processing.toml", "w")
    toml.dump(config, f)

    # for linear search
    config = toml.load("../config.toml")
    config["max_dim"] = max_dim
    f = open("../config.toml", "w")
    toml.dump(config, f)
Esempio n. 11
0
def do_linear_search(test=False, test_dim=32):
    """
    Linear search function...

    Returns
    -------
    None.

    """
    logger = ut.get_logger()

    device = "cuda"
    model_name = "EDSR"
    config = toml.load("../config.toml")
    run = config["run"]
    scale = int(config["scale"]) if config["scale"] else 4
    # device information
    _, device_name = ut.get_device_details()
    total, _, _ = ut.get_gpu_details(
        device, "\nDevice info:", logger, print_details=False
    )
    log_message = (
        "\nDevice: "
        + device
        + "\tDevice name: "
        + device_name
        + "\tTotal memory: "
        + str(total)
    )
    logger.info(log_message)

    ut.clear_cuda(None, None)

    state = "Before loading model: "
    total, used, _ = ut.get_gpu_details(device, state, logger, print_details=True)

    model = md.load_edsr(device=device)

    state = "After loading model: "
    total, used, _ = ut.get_gpu_details(device, state, logger, print_details=True)

    # =============================================================================
    #     file = open("temp_max_dim.txt", "r")
    #     line = file.read()
    #     max_dim = int(line.split(":")[1])
    # =============================================================================
    config = toml.load("../config.toml")
    max_dim = int(config["max_dim"])
    if test == False:
        detailed_result, memory_used, memory_free = result_from_dimension_range(
            device, logger, config, model, 1, max_dim
        )
    else:
        detailed_result, memory_used, memory_free = result_from_dimension_range(
            device, logger, config, model, test_dim, test_dim
        )
    if test == False:
        # get mean
        # get std
        mean_time, std_time = ut.get_mean_std(detailed_result)
        mean_memory_used, std_memory_used = ut.get_mean_std(memory_used)
        mean_memory_free, std_memory_free = ut.get_mean_std(memory_free)

        # make folder for saving results
        plt_title = "Model: {} | GPU: {} | Memory: {} MB".format(
            model_name, device_name, total
        )
        date = "_".join(str(time.ctime()).split())
        date = "_".join(date.split(":"))
        foldername = date
        os.mkdir("results/" + foldername)
        # plot data
        ut.plot_data(
            foldername,
            "dimension_vs_meantime",
            mean_time,
            "Dimensionn of Patch(nxn)",
            "Mean Processing Time: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="mean time",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_stdtime",
            std_time,
            "Dimension n of Patch(nxn)",
            "Std of Processing Time: LR -> SR, Scale: {} ( {} runs )".format(
                scale, run
            ),
            mode="std time",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_meanmemoryused",
            mean_memory_used,
            "Dimension n of Patch(nxn)",
            "Mean Memory used: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="mean memory used",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_stdmemoryused",
            std_memory_used,
            "Dimension n of Patch(nxn)",
            "Std Memory Used: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="std memory used",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_meanmemoryfree",
            mean_memory_free,
            "Dimension n of Patch(nxn)",
            "Mean Memory Free: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="mean memory free",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_stdmemoryfree",
            std_memory_free,
            "Dimension n of Patch(nxn)",
            "Std Memory Free: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="std memory free",
            title=plt_title,
        )
        # save data
        ut.save_csv(
            foldername,
            "total_stat",
            device,
            device_name,
            total,
            mean_time,
            std_time,
            mean_memory_used,
            std_memory_used,
            mean_memory_free,
            std_memory_free,
        )
Esempio n. 12
0
def result_from_dimension_range(device, logger, config, model, first, last):
    """
    Get detailed result for every dimension from 1 to the last acceptable dimension

    Parameters
    ----------
    device : str
        device type.
    model : torch.nn.model
        SR model.
    first : int
        starting dimension.
    last : int
        last acceptable dimension.
    run : int, optional
        total run to average the result. The default is 10.

    Returns
    -------
    result3 : dictionary
        time for every dimension.
    memory_used : dictionary
        memory used per dimension.
    memory_free : dictionary
        memory free per dimension.

    """
    run = config["run"]
    print("\nPreparing detailed data... ")
    result3 = {}
    memory_used = {}
    memory_free = {}
    for i in range(run):
        print("\nRun: ", i + 1)
        print()
        for dim in tqdm(range(first, last + 1)):
            dimension = dim
            input_image = ut.random_image(dimension)
            input_image = input_image.to(device)
            with torch.no_grad():
                try:
                    print("\n")
                    print(input_image.shape)
                    print(input_image[0, 0, 0, 0:5])
                    start = time.time()
                    output_image = model(input_image)
                    end = time.time()
                    total_time = end - start
                    print("Processing time: ", total_time)
                    print("\n")
                    if dimension in result3.keys():
                        result3[dimension].append(total_time)
                        _, used, free = ut.get_gpu_details(
                            device, "", None, print_details=False
                        )
                        memory_used[dimension].append(used)
                        memory_free[dimension].append(free)
                    else:
                        result3[dimension] = [total_time]
                        _, used, free = ut.get_gpu_details(
                            device, "", None, print_details=False
                        )
                        memory_used[dimension] = [used]
                        memory_free[dimension] = [free]
                    ut.clear_cuda(input_image, output_image)
                except RuntimeError as err:
                    logger.exception("\nDimension NOT OK!")

                    state = "\nGPU usage after dimension exception...\n"
                    ut.get_gpu_details(device, state, logger, print_details=True)

                    output_image = None
                    ut.clear_cuda(input_image, output_image)

                    state = f"\nGPU usage after clearing the image {dimension}x{dimension}...\n"
                    ut.get_gpu_details(device, state, logger, print_details=True)
                    break
        ut.clear_cuda(None, None)
        subprocess.run("gpustat", shell=True)
    return result3, memory_used, memory_free
def forward_chop_iterative(x,
                           model=None,
                           shave=10,
                           min_size=1024,
                           device="cuda",
                           print_result=True):
    dim = int(math.sqrt(min_size))  # getting patch dimension
    b, c, h, w = x.size()  # current image batch, channel, height, width
    device = device
    patch_count = 0
    output = torch.tensor(np.zeros((b, c, h * 4, w * 4)))
    total_time = 0
    total_crop_time = 0
    total_shift_time = 0
    total_clear_time = 0
    if device == "cuda":
        x = x.to(device)

    new_i_s = 0
    for i in range(0, h, dim - 2 * shave):
        new_j_s = 0
        new_j_e = 0
        for j in range(0, w, dim - 2 * shave):
            patch_count += 1
            h_s, h_e = i, min(h, i + dim)  # patch height start and end
            w_s, w_e = j, min(w, j + dim)  # patch width start and end
            lr = x[:, :, h_s:h_e, w_s:w_e]

            with torch.no_grad():
                # EDSR processing
                start = time.time()
                sr = model(lr)
                end = time.time()
                processing_time = end - start
                total_time += processing_time

            # new cropped patch's dimension (h and w)
            n_h_s, n_h_e, n_w_s, n_w_e = 0, 0, 0, 0

            n_h_s = 0 if h_s == 0 else (shave * 4)
            n_h_e = ((h_e - h_s) * 4) if h_e == h else (((h_e - h_s) - shave) *
                                                        4)
            new_i_e = new_i_s + n_h_e - n_h_s

            n_w_s = 0 if w_s == 0 else (shave * 4)
            n_w_e = ((w_e - w_s) * 4) if w_e == w else (((w_e - w_s) - shave) *
                                                        4)
            new_j_e = new_j_e + n_w_e - n_w_s

            # corpping image in
            crop_start = time.time()
            sr_small = sr[:, :, n_h_s:n_h_e, n_w_s:n_w_e]
            crop_end = time.time()
            crop_time = crop_end - crop_start
            total_crop_time += crop_time
            # =============================================================================
            #                 print('Crop time: ', crop_time)
            # =============================================================================

            shift_start = time.time()
            if device == "cuda":
                sr_small = sr_small.to("cpu")
            shift_end = time.time()
            shift_time = shift_end - shift_start
            total_shift_time += shift_time
            # =============================================================================
            #                 print('Shift time: ', shift_time)
            # =============================================================================
            output[:, :, new_i_s:new_i_e, new_j_s:new_j_e] = sr_small
            del sr_small

            if w_e == w:
                break
            new_j_s = new_j_e
            clear_start = time.time()
            if device == "cuda":
                ut.clear_cuda(lr, sr)
            clear_end = time.time()
            clear_time = clear_end - clear_start
            total_clear_time += clear_time
        new_i_s = new_i_e
        if h_e == h:
            break
    if print_result == True:
        print("Patch dimension: {}x{}".format(dim, dim))
        print("Total pacthes: ", patch_count)
        print("Total EDSR Processing time: ", total_time)
        print("Total crop time: ", total_crop_time)
        print("Total shift time: ", total_shift_time)
        print("Total clear time: ", total_clear_time)
    return output, total_time, total_crop_time, total_shift_time, total_clear_time