Ejemplo n.º 1
0
def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = data["steering_angle"]
        # The current throttle of the car
        throttle = data["throttle"]
        # The current speed of the car
        speed = data["speed"]
        # The current image from the center camera of the car
        imgString = data["image"]
        image = Image.open(BytesIO(base64.b64decode(imgString)))
        image_array = preprocess_img(np.asarray(image))
        steering_angle = float(
            model.predict(image_array[None, :, :, :], batch_size=1))
        min_speed = 8
        max_speed = 10
        if float(speed) < min_speed:
            throttle = 1.0
        elif float(speed) > max_speed:
            throttle = -1.0
        else:
            throttle = 0.1

        print(steering_angle, throttle)
        send_control(steering_angle, throttle)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True)
Ejemplo n.º 2
0
    def run(self, img):
        '''yolov5 trt inference func

        :param img: np img
        :return:
            dst_list : [(x1,y1,x2,y2,label,conf),...]
        '''
        dst_list = []
        # pre process
        resize_img = preprocess_img(img)
        resize_img = torch.from_numpy(resize_img).to(self.device)
        # inference
        output = self.model(resize_img)
        # post process
        pred = self.post_process(output)
        for i, det in enumerate(pred):  # detections per image
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(resize_img.shape[2:], det[:, :4],
                                          img.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    if float('%.2f' % conf) > self.conf_thresh:
                        x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(
                            xyxy[2]), int(xyxy[3])
                        # label is self.names[int(cls)], score is conf
                        dst_list.append((x1, y1, x2, y2, self.names[int(cls)],
                                         float('%.2f' % conf)))

        return dst_list
Ejemplo n.º 3
0
 def run(self, img):
     resize_img = preprocess_img(img, self.scale_factor)
     output = self.trt.run(resize_img)
     full_mask = self.post_process(output, img.size)
     # full_mask = full_mask > self.conf_thresh
     mask_image = self.mask_to_image(full_mask)
     return mask_image
def test_ten_point(render_ground_truth=False, render_reconstruction=False):
    """ A simple test to check if we can recover the 3D positions of 10 known 3D points
    and camera parameters given two images of the points where the correspondences
    are known to be correct. """

    points, colors = get_points() # get some known 3D points, each with a color
    camera_params, focal_x, focal_y, rows, cols = get_cameras() # get some known cameras

    # project the 3d points into each camera
    cam_1_points2d = project(points, camera_params[np.asarray([0 for _ in points])],
                             focal_x, focal_y)
    cam_2_points2d = project(points, camera_params[np.asarray([1 for _ in points])],
                             focal_x, focal_y)
    # draw the projected points in the camera images
    cam_1_img = utils.draw_points2d(cam_1_points2d, colors, rows, cols, show=False)
    cam_2_img = utils.draw_points2d(cam_2_points2d, colors, rows, cols, show=False)

    # find correspondences between the two images
    kp1, kp2, n_kp1, n_kp2 = matcher.find_matching_points_mock(utils.preprocess_img(cam_1_img),
                                                               utils.preprocess_img(cam_2_img))

    assert len(kp1) == len(n_kp1) == len(kp2) == len(n_kp2) == len(points)

    # keep track of which correspondence maps to which color
    kp_to_color = {i: cam_1_img[kp[1], kp[0]] for i, kp in enumerate(kp1)}

    if render_ground_truth: # show the ground truth geometry
        render_pts_and_cams(points, colors, camera_params[:, 3:], camera_params[:, :3],
                            focal_x, use_spheres=True)

    # run the solver with the correspondences to generate a reconstruction
    camera_kps = np.stack([n_kp1, n_kp2], axis=0)
    camera_params, points_3d, camera_indices, point_indices, points_2d, focal_length = \
        solver.get_solver_params(camera_kps)
    recon_camera_params, recon_3d_points, recon_focal_length, _ = solver.run_solver(
        camera_params, points_3d, camera_indices, point_indices, points_2d, focal_length,
        toss_outliers=False)

    recon_colors = [kp_to_color[i] for i in range(len(points_3d))]
    if render_reconstruction:
        render_pts_and_cams(recon_3d_points, recon_colors, recon_camera_params[:, 3:], 
                            recon_camera_params[:, :3],
                            recon_focal_length, use_spheres=True)

    check_image_match(recon_3d_points, recon_camera_params, recon_focal_length, recon_colors,
                      points, camera_params, focal_x, colors, rows, cols)
Ejemplo n.º 5
0
def worker(q):
    '''
    q: a multiprocessing.Queue object. Each item in the queue contains a tuple
    of url, image_bytes)

    a worker continously take images from the work queue, preprocess the image,
    send it to the prediction cluster, and write results to database in batches.
    '''

    mydb = MySQLdb.connect(host=DB_HOST,
                           port=DB_PORT,
                           user=DB_USER,
                           passwd=DB_PASSWD,
                           db=DB_NAME)
    mycursor = mydb.cursor()

    count = 0
    val = []
    sql = "INSERT INTO images (url, process_date, flag) VALUES (%s, %s, %s)"

    while True:
        # block=True: no exception will be thrown when queue is empty
        # timeout = 5 : timeout expection will be thrown after 5 seconds
        try:
            url, image_bytes = q.get(block=True, timeout=5)
        except:
            break
        if url == 'start':
            print('start processing at', time())
            continue
        if url == 'done':
            break

        input = preprocess_img(image_bytes)
        payload = {"instances": [{'input_image': input.tolist()}]}
        r = requests.post(MODEL_SERVER, json=payload)
        flag = decode_response(r)
        process_date = datetime.datetime.today().strftime('%Y-%m-%d')
        val.append((url, process_date, flag))
        count += 1

        # save and commit to database after COMMIT_SIZE records are accumulated
        if count == COMMIT_SIZE:
            mycursor.executemany(sql, val)
            mydb.commit()
            val = []
            count = 0

    # save and commit the remaining records before closing connection
    if count > 0:
        mycursor.executemany(sql, val)
    mydb.commit()
    mycursor.close()
    mydb.close()
Ejemplo n.º 6
0
def inference_image(model, logger, img=np.array(Image.open(inf_img_src).convert('RGB')), compare=True, record=True, dpi=500):

    if compare:
        assert img.shape[1] == IMG_DIM * 2
        img, mask = split_img(img, IMG_DIM)

    orig_img = img.copy()
    img = preprocess_img(img)
    img = img.to(device)

    # Inference: 
    y_pred = model(img)
    y_pred = torch.argmax(y_pred, dim=1)
    y_pred = y_pred[0].cpu().detach().numpy()

    plt.figure(figsize=(IMG_DIM/dpi, IMG_DIM/dpi), dpi=dpi)
    plt.figimage(y_pred)
    plt.axis('off')
    buf = io.BytesIO()
    plt.savefig(buf, format='jpg', dpi=dpi)
    buf.seek(0)

    y_pred_out = Image.open(buf).resize((IMG_DIM, IMG_DIM), Image.LANCZOS).convert("RGB")
    y_pred_out = cv2.cvtColor(np.array(y_pred_out), cv2.COLOR_RGB2BGR)

    # compare 
    if compare:

        # Get GT
        cluster_model = get_clustering_model(logger)
        mask = cv2.resize(mask, (IMG_DIM, IMG_DIM), interpolation=cv2.INTER_AREA)
        class_map = cluster_model.predict(mask.reshape(-1, 3)).reshape(IMG_DIM, IMG_DIM)
        
        # IoU
        intersection = np.logical_and(class_map, y_pred)
        union = np.logical_or(class_map, y_pred)
        iou_score = np.sum(intersection) / np.sum(union)

        # Visualize
        class_map_out = cv2.putText(mask, 'GT, IoU: {0}'.format(round(iou_score, 3)), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) 
        y_pred_out = cv2.putText(y_pred_out, 'Prediction', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) 
        orig_img = cv2.putText(orig_img, 'Image', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)
        y_pred_out = np.concatenate((orig_img, y_pred_out, class_map_out), axis=1)

    # Record
    if record:
        cv2.imwrite(inf_out_img_src, y_pred_out)
        logger.info("(3) Inference Finished. Output image: {0}".format(inf_out_img_src))
    
    cv2.imshow("Output", y_pred_out)

    return y_pred_out
Ejemplo n.º 7
0
def transform_img(img_path, out_dir, img_size):
    img_np = preprocess_img(get_img(img_path, size=img_size))
    img_np = np.expand_dims(img_np, 0)

    # generator
    gen = get_module(img_np.shape, ctx)
    gen.load_params(args.checkpoint)

    data = mx.nd.array(img_np)
    gen.forward(mx.io.DataBatch([data], [0]), is_train=False)

    save_file = os.path.basename(os.path.normpath(img_path))
    save_output(gen, os.path.join(out_dir, save_file))
Ejemplo n.º 8
0
def ENAS(train_data):
    clf = ak.ImageClassifier(verbose=True)
    fold = FLAGS.enas_fold
    kf = KFold(n_splits=fold, shuffle=True, random_state=100)
    for _, test_index in kf.split(train_data):
        debug_data = train_data.iloc[test_index]
        break
    print('train size', debug_data.shape[0])
    x_train = preprocess_img(debug_data['img'])

    category = debug_data['class_id'].unique()
    print('class size ', category.shape[0])
    category_dict = dict((category[i], i) for i in range(category.shape[0]))
    y_train = debug_data['class_id'].apply(lambda id: category_dict[id]).values
    clf.fit(x_train, y_train, time_limit=FLAGS.enas_time)
    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = cv2.imread(img_path)

        # mirror img with a 50% chance
        if self.mirror:
            if random.random() > 0.5:
                img = img[:, ::-1, :]
        # resize
        img = cv2.resize(img, (self.size, self.size))

        # normalize
        img = preprocess_img(img)

        return torch.tensor(img.astype(np.float32))
def get_model_compatible_input(gray_frame, face):
    img_arr = utils.align_face(gray_frame, face, desiredLeftEye)
    img_arr = utils.preprocess_img(img_arr, resize=False)

    landmarks = shape_predictor(
        gray_frame,
        face,
    )
    roi1, roi2 = utils.extract_roi1_roi2(gray_frame, landmarks)
    roi1 = np.expand_dims(roi1, 0)
    roi2 = np.expand_dims(roi2, 0)
    roi1 = roi1 / 255.
    roi2 = roi2 / 255.

    return [img_arr, roi1, roi2]
Ejemplo n.º 11
0
def yield_from_dir(in_dir):
    files = get_imagenames(in_dir)
    for fn, fpath in enumerate(files):
        if not args.gray:
            # Open image as a CxHxW torch.Tensor
            img = cv2.imread(fpath)
            # from HxWxC to CxHxW, RGB image
            img = (cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).transpose(2, 0, 1)
        else:
            # from HxWxC to  CxHxW grayscale image (C=1)
            img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)

        img, expanded_h, expanded_w = preprocess_img(img,
                                                     expand_if_needed=False,
                                                     expand_axis0=False)
        yield fpath, img
Ejemplo n.º 12
0
    def __init__(self, config):
        """
        Initializes the model
        :param config: A model configuration object of type Config
        """
        self.config = config
        self.input_real, self.input_z = model_inputs(self.config.real_dim,
                                                     self.config.z_dim)

        G_model = generator(self.input_z)
        logits_real = discriminator(preprocess_img(self.input_real))
        logits_fake = discriminator(G_model, reuse=True)
        self.D_loss, self.G_loss = wgangp_loss(logits_real, logits_fake,
                                               self.config.batch_size,
                                               self.input_real, G_model)
        self.D_opt, self.G_opt = model_opt(self.D_loss, self.G_loss,
                                           self.config.lr, self.config.beta1)
Ejemplo n.º 13
0
def haar_detector(frame):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face_frame = np.zeros(gray_frame.shape, dtype="uint8")

    offset = 15
    x_pos, y_pos = 10, 40

    faces = cascade_detector.detectMultiScale(gray_frame, 1.32, 5)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face),
                                   desiredLeftEye)
        face_frame = cv2.resize(img_arr, (48, 48),
                                interpolation=cv2.INTER_CUBIC)
        img_arr = utils.preprocess_img(img_arr, resize=False)

        predicted_proba = model.predict(img_arr)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
    return frame, face_frame
Ejemplo n.º 14
0
def dlib_detector(frame_orig):
    frame = frame_orig.copy()
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    offset = 15
    x_pos, y_pos = 10, 40

    faces = hog_detector(gray_frame)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        img_arr = utils.align_face(gray_frame, face, desiredLeftEye)
        img_arr = utils.preprocess_img(img_arr, resize=False)

        predicted_proba = model.predict(img_arr)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = rect_to_bb(face)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))

    return frame
Ejemplo n.º 15
0
def main():
    args = parse_args()
    raw_img = cv2.imread(args.input, 1)
    raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR)

    raw_img = np.float32(raw_img) / 255
    image, norm_image = preprocess_img(raw_img)
    model = models.__dict__[args.arch](pretrained=True).eval()
    model = model.cuda()

    gc = GradCAM(model, target_layer=args.target_layer)

    heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    cam = show_cam(image, heatmap, args.output)

    if args.ins_del:
        blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.))
        insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur)
        deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like)
        out_video_path = './VIDEO'
        check_path_exist(out_video_path)

        ins_path = os.path.join(os.path.join(out_video_path, "ins"))
        del_path = os.path.join(os.path.join(out_video_path, "del"))
        check_path_exist(ins_path)
        check_path_exist(del_path)

        norm_image = norm_image.cpu()
        heatmap = heatmap.cpu().numpy()

        ins_score = insertion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=ins_path)
        del_score = deletion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=del_path)
        print("\nDeletion - {:.5f}\nInsertion - {:.5f}".format(auc(del_score), auc(ins_score)))

        # generate video
        video_ins = os.path.join(ins_path, args.input.split('/')[-1].split('.')[0] + '.avi')
        video_del = os.path.join(del_path, args.input.split('/')[-1].split('.')[0] + '.avi')
        cmd_str_ins = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(ins_path, video_ins)
        cmd_str_del = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(del_path, video_del)
        os.system(cmd_str_ins)
        os.system(cmd_str_del)
    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = cv2.imread(img_path)

        # center crop
        h, w = img.shape[:2]
        min_side = min(h, w)
        top, bot = (h - min_side)//2, h - (h - min_side)//2
        left, right = (w - min_side) // 2, w - (w - min_side) // 2
        img = img[top:bot, left:right, :]

        # mirror img with a 50% chance
        if self.mirror:
            if random.random() > 0.5:
                img = img[:, ::-1, :]

        # resize
        img = cv2.resize(img, (self.size, self.size))

        # normalize
        img = preprocess_img(img)

        return torch.tensor(img.astype(np.float32))
Ejemplo n.º 17
0
 def DNN_DataSet(self, df):
     """
     """
     return preprocess_img(df['img'])
Ejemplo n.º 18
0
def classify(image):
    model = get_model("efficientnet-b0")
    img = preprocess_img(image)
    return predict(model, img)
Ejemplo n.º 19
0
def dnn_detector(frame):
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123],
                                 False, False)

    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    idx = 0
    offset = 15
    x_pos, y_pos = 10, 40

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face_frame = np.zeros(gray_frame.shape, dtype="uint8")

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            idx += 1
            x1 = int(detections[0, 0, i, 3] * frame_width)
            y1 = int(detections[0, 0, i, 4] * frame_height)
            x2 = int(detections[0, 0, i, 5] * frame_width)
            y2 = int(detections[0, 0, i, 6] * frame_height)
            bboxes.append([x1, y1, x2, y2])

            face = [x1, y1, x2 - x1, y2 - y1]

            if hist_eq:
                gray_frame = cv2.equalizeHist(gray_frame)

            img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face),
                                       desiredLeftEye)
            face_frame = cv2.resize(img_arr, (48, 48),
                                    interpolation=cv2.INTER_CUBIC)
            img_arr = utils.preprocess_img(img_arr, resize=False)

            predicted_proba = model.predict(img_arr)
            predicted_label = np.argmax(predicted_proba[0])

            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            text = f"Person {idx}: {label2text[predicted_label]}"
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x1 + 5,
                                           y1,
                                           font_scale=0.4)

            text = f"Person {idx} :  "
            y_pos = y_pos + 2 * offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
            for k, v in label2text.items():
                text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
                y_pos = y_pos + offset
                utils.draw_text_with_backgroud(frame,
                                               text,
                                               x_pos,
                                               y_pos,
                                               font_scale=0.3,
                                               box_coords_2=(2, -2))
    return frame, face_frame
Ejemplo n.º 20
0
def main(args):
    for arg in vars(args):
        print(arg, getattr(args, arg))

    model_name = args.model_name
    img_path = args.img_path
    img_label_path = 'imagenet.json'
    true_class = args.true_label
    adversarial_label = args.adv_label
    demo_epoch = args.epoch
    demo_eps = args.eps
    demo_lr = args.lr
    label_num = args.label_num
    lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss

    # load model
    sess, graph, img_size, images_v, logits = load_pretrain_model(model_name)
    probs = tf.nn.softmax(logits)
    print("sucessfully load model")

    if args.write_summary:
        unique_path_name = "up{}down{}ce{}epoch{}lr{}".format(
            args.lambda_up, args.lambda_down, args.lambda_label_loss,
            args.epoch, args.lr)
        final_summary_path = os.path.join(args.summary_path, unique_path_name)
        if not os.path.exists(final_summary_path):
            os.makedirs(final_summary_path)
        summary_writer = tf.summary.FileWriter(final_summary_path, graph)

    global_step = tf.Variable(0, name="global_step", trainable=False)
    step_init = tf.variables_initializer([global_step])

    y_hat = tf.placeholder(tf.int32, ())
    label_logits = tf.gather_nd(logits, [[0, y_hat]])

    img = PIL.Image.open(img_path)
    img = preprocess_img(img, img_size)
    batch_img = np.expand_dims(img, 0)
    imagenet_label = load_imagenet_label(img_label_path)

    # -------------------
    # Step 1: classify the image with original model
    p = sess.run(probs, feed_dict={images_v: batch_img})[0]
    predict_label = np.argmax(p)
    #classify(img, p, imagenet_label, correct_class=true_class, is_cluster=True)

    # -------------------
    # Step 2: Construct adversarial examples
    image_pl = tf.placeholder(tf.float32, (1, img_size, img_size, 3))
    assign_op = tf.assign(images_v, image_pl)
    learning_rate = tf.placeholder(tf.float32, ())
    var_eps = tf.placeholder(tf.float32, ())
    labels = tf.one_hot(y_hat, label_num)
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                   labels=labels)[0]

    projected = tf.clip_by_value(
        (tf.clip_by_value(images_v, image_pl - var_eps, image_pl + var_eps)),
        0, 1)
    with tf.control_dependencies([projected]):
        project_step = tf.assign(images_v, projected)

    # initialization step
    _ = sess.run([assign_op, step_init], feed_dict={image_pl: batch_img})

    # construct targeted attack
    # feed_dict_optim = {image_pl:batch_img,
    #                    y_hat:adversarial_label,
    #                    learning_rate:demo_lr}
    #
    # feed_dict_proj = {image_pl:batch_img,
    #                   var_eps:demo_eps}
    # optim_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, var_list=[images_v])
    # model_train(sess=sess,
    #             optim_step=optim_step,
    #             project_step=project_step,
    #             loss=loss,
    #             feed_dict_optim=feed_dict_optim,
    #             feed_dict_project=feed_dict_proj,
    #             epoch=10)
    #
    # adv_img = np.squeeze(images_v.eval(),0)
    # adv_prob = sess.run(probs,feed_dict={images_v:np.expand_dims(adv_img,0)})
    # classify(adv_img, adv_prob[0],imagenet_label,correct_class=281,target_class=adversarial_label)
    #
    # # show the saliency map
    # feed_dict_gradient = {y_hat:true_class}
    # _ = show_gradient_map(graph=graph,
    #                   sess=sess,
    #                   y=label_logits,
    #                   x=images_v,
    #                   img=img,
    #                   is_integrated=False,
    #                   is_smooth=False,
    #                   feed_dict=feed_dict_gradient)
    #---------------
    # use gradient descent to control the saliency map

    # original gradient intensity
    map3D, map_grey = show_gradient_map(graph=graph,
                                        sess=sess,
                                        y=label_logits,
                                        x=images_v,
                                        img=img,
                                        is_integrated=False,
                                        is_smooth=True,
                                        feed_dict={y_hat: true_class},
                                        is_cluster=args.is_cluster)

    center_more, radius_more = (100, 110), 10
    center_less, radius_less = (100, 70), 10
    gradient_more = calculate_region_importance(map_grey, center_more,
                                                radius_more)
    gradient_less = calculate_region_importance(map_grey, center_less,
                                                radius_less)
    print(
        "region 1 gradient intensity %.3f, region 2 gradient intensity %.3f" %
        (gradient_more, gradient_less))

    # construct new loss function
    grad_map = tf.gradients(label_logits, images_v)[0]
    to_down_gradient = calculate_img_region_importance(grad_map, center_more,
                                                       radius_more)
    to_up_gradient = calculate_img_region_importance(grad_map, center_less,
                                                     radius_less)
    grad_loss = -lambda_up * to_up_gradient + lambda_down * to_down_gradient
    final_loss = grad_loss + lambda_label_loss * loss
    if args.write_summary:
        up_gradient_summary = tf.summary.scalar("up_gradient", to_up_gradient)
        down_gradient_summary = tf.summary.scalar("down_gradient",
                                                  to_down_gradient)
        loss_summary = tf.summary.scalar("loss", loss)
        train_summary_op = tf.summary.merge_all()
    change_grad_optim_step = tf.train.GradientDescentOptimizer(
        learning_rate=demo_lr).minimize(final_loss,
                                        var_list=[images_v],
                                        global_step=global_step)
    for i in range(demo_epoch):
        if args.write_summary:
            _, _loss, step, summary_str = sess.run([
                change_grad_optim_step, final_loss, global_step,
                train_summary_op
            ],
                                                   feed_dict={
                                                       image_pl: batch_img,
                                                       y_hat: true_class,
                                                       learning_rate: demo_lr
                                                   })
            summary_writer.add_summary(summary_str, global_step=step)
        else:
            _, _loss, step = sess.run(
                [change_grad_optim_step, final_loss, global_step],
                feed_dict={
                    image_pl: batch_img,
                    y_hat: true_class,
                    learning_rate: demo_lr
                })

        sess.run([project_step],
                 feed_dict={
                     image_pl: batch_img,
                     var_eps: demo_eps
                 })
        print("%d loss = %g" % (i, _loss))
        if i % args.image_interval == 0:
            adv_img = np.squeeze(images_v.eval(), 0)
            # check the prediction result
            p_adv = sess.run(probs, feed_dict={images_v: batch_img})[0]
            predict_label_adv = np.argmax(p_adv)
            #classify(adv_img, p_adv, imagenet_label, correct_class=true_class,is_cluster=args.is_cluster)

            # check the gradient map
            map3D_adv, map_grey_adv = show_gradient_map(
                graph=graph,
                sess=sess,
                y=label_logits,
                x=images_v,
                img=adv_img,
                is_integrated=False,
                is_smooth=False,
                feed_dict={y_hat: true_class},
                is_cluster=args.is_cluster)

            adv_gradient_more = calculate_region_importance(
                map_grey_adv, center_more, radius_more)
            adv_gradient_less = calculate_region_importance(
                map_grey_adv, center_less, radius_less)

            if args.write_summary:
                map_grey_adv = tf.expand_dims(tf.expand_dims(map_grey_adv, 0),
                                              3)
                adv_map_sum = tf.summary.image(
                    'adv_map' + str(i), tf.convert_to_tensor(map_grey_adv))
                adv_str = sess.run(adv_map_sum)
                summary_writer.add_summary(adv_str)
            print(
                "Adversarial Case: predict label: %d, big region  gradient intensity: %.3f, small region gradient intensity: %.3f"
                % (predict_label_adv, adv_gradient_more, adv_gradient_less))
            print(
                "Normal Case: predict label: %d, big region gradient intensity: %.3f, small region gradient intensity: %.3f"
                % (predict_label, gradient_more, gradient_less))

    # write original map
    map_grey = tf.expand_dims(tf.expand_dims(map_grey, 0), 3)
    orig_map_sum = tf.summary.image('orig_map', tf.convert_to_tensor(map_grey))
    orig_str = sess.run(orig_map_sum)
    summary_writer.add_summary(orig_str)
Ejemplo n.º 21
0
parser.add_argument('path',
                    type=str,
                    nargs=1,
                    help='path to pdf file',
                    metavar='--p')
args = parser.parse_args()
pdf_path = args.path[0]
doc = fitz.open(pdf_path)
clf = joblib.load('SVMcls.pkl')
for i, page in enumerate(doc):
    print('Converting page no. {} to image'.format(i + 1))
    zoom = 2  # zoom factor
    mat = fitz.Matrix(zoom, zoom)
    pixmap = page.getPixmap(matrix=mat)
    page_im = pix2np(pixmap)
    prep_im = preprocess_img(page_im)
    rects = get_bounding_rects(prep_im)
    bboxes = []
    print('Detected {} candidate segments.'.format(len(rects)))
    for rect in rects:
        x, y, w, h = rect
        crop = prep_im[y:y + h, x:x + w]
        hist = np.reshape(get_hist(crop), (1, -1))
        pred = clf.predict(hist)
        if pred == 1:
            bboxes.append((x, y, w, h))
    print('Found {} handsigns at page {}'.format(len(bboxes), i + 1))
    if bboxes:
        for bbox in bboxes:
            x, y, w, h = bbox
            cv2.rectangle(page_im, (x, y), (x + w, y + h), (0, 255, 0), 2)
Ejemplo n.º 22
0
args = parse_arguments('')
model_name = args.model_name
img_path = args.img_path
img_label_path = 'imagenet.json'
true_class = args.true_label
adversarial_label = args.adv_label
label_num = args.label_num
lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss
sess, graph, img_size, images_pl, logits = load_pretrain_model(model_name,
                                                               is_explain=True)
y_label = tf.placeholder(dtype=tf.int32, shape=())
img_label = load_imagenet_label(img_label_path)

img = PIL.Image.open(img_path)
img = preprocess_img(img, img_size)
#new_img = np.load('big_vgg16_30_0.0001_1000_0.001_0.03_3000.npy') # 258
new_img = np.load('vgg16_60_70_35_45_30_0.0001_800_0.0_0.0_9000.npy')  # 208

batch_img = np.expand_dims(img, 0)
new_batch_img = np.expand_dims(new_img, 0)

true_class = 208
label_logits = logits[0, true_class]
gradient_saliency = saliency.GradientSaliency(graph, sess, label_logits,
                                              images_pl)  # 1951/1874

attributions = OrderedDict()
with DeepExplain(session=sess) as de:
    ori_attributions = {
        # Gradient-based
Ejemplo n.º 23
0
        if frames_q:
            imgs = image2pipe.utils.yield_from_queue(frames_q)
        else:
            # Get ordered list of filenames
            print("\tOpen sequence in folder: ", args.read_path)
            imgs = yield_from_dir(args.read_path)

        seq_list = []
        seq_outnames = []

        for fn_or_fpath, img in imgs:
            if type(fn_or_fpath) is int:
                fpath = "%06d.png" % fn_or_fpath
                # from HxWxC to CxHxW, RGB image
                img = img.transpose(2, 0, 1)
                img, expanded_h, expanded_w = preprocess_img(
                    img, expand_if_needed=False, expand_axis0=False)
            else:
                fpath = fn_or_fpath

            print("Load img:", fpath, img.shape)

            seq_list.append(img)
            seq_outnames.append(os.path.basename(fpath))
            seq = np.stack(seq_list, axis=0)
            # return seq, expanded_h, expanded_w

            if len(seq_list) == NUM_IN_FR_EXT:
                print("Infer batch ...")

                seq = torch.from_numpy(seq).to(device)
                seq_time = time.time()
Ejemplo n.º 24
0
    mod.symbol.save(model_save_path + '.json')


if __name__ == "__main__":
    parser = build_parser()
    args = parser.parse_args()
    check_opts(args)

    # init
    ctx = mx.gpu(args.gpu) if args.gpu >= 0 else mx.cpu()
    ctx = mx.cpu()
    vgg_params = mx.nd.load(args.vgg_path)

    # init style
    print('load style image', args.style_image)
    style_np = preprocess_img(get_img(args.style_image))
    style_np = np.expand_dims(style_np, 0)
    dshape = style_np.shape

    style_exec = get_style_excutor(vgg_params, dshape, ctx)
    style_exec.data[:] = mx.nd.array(style_np)
    style_exec.executor.forward()
    style_array = [
        mx.nd.repeat(arr.copyto(ctx), axis=0, repeats=args.batch_size)
        for arr in style_exec.outputs
    ]
    del style_exec
    #
    TRAIN_SHAPE = (256, 256)
    dshape = (args.batch_size, 3, *TRAIN_SHAPE)
Ejemplo n.º 25
0
def main(args):
    for arg in vars(args):
        print(arg, getattr(args, arg))

    model_name = args.model_name
    img_path = args.img_path
    img_label_path = 'imagenet.json'
    true_class = args.true_label
    adversarial_label = args.adv_label
    label_num = args.label_num
    lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss

    # model_name = 'inception_v3'
    # img_path = './picture/dog_cat.jpg'
    # img_label_path = 'imagenet.json'
    # true_class = 208
    sess, graph, img_size, images_pl, logits = load_pretrain_model(
        model_name, is_explain=True)
    y_label = tf.placeholder(dtype=tf.int32, shape=())
    label_logits = logits[0, y_label]

    if len(args.imp) > 0:
        img = np.load(args.imp)
        init_epoch = int(args.imp[:-4].split('_')[-1])
        loss_list = list(np.load('loss_' + args.imp))
    else:
        img = PIL.Image.open(img_path)
        img = preprocess_img(img, img_size)
        init_epoch = 0
        loss_list = []

    old_img = np.array(img)
    batch_img = np.expand_dims(img, 0)

    #new_img = np.load('vgg16_30_0.0004_1000_0.001_0.03_4000.npy')
    #new_batch_img = np.concatenate((np.expand_dims(new_img,0),batch_img),axis=0)
    #new_batch_img = np.expand_dims(new_img,0)
    #all_img = np.concatenate((batch_img,new_batch_img))
    imagenet_label = load_imagenet_label(img_label_path)
    prob = tf.nn.softmax(logits)
    _prob = sess.run(prob, feed_dict={images_pl: batch_img})[0]
    #classify(img,_prob,imagenet_label,1,1)

    ####
    #deep explain
    # from deepexplain.tensorflow import DeepExplain
    # label_logits = logits[0,208]
    # with DeepExplain(session=sess) as de:
    #     attributions = {
    #         # Gradient-based
    #         # NOTE: reduce_max is used to select the output unit for the class predicted by the classifier
    #         # For an example of how to use the ground-truth labels instead, see mnist_cnn_keras notebook
    #         'Saliency maps': de.explain('saliency', label_logits, images_pl, batch_img),
    #         'Gradient * Input': de.explain('grad*input', label_logits, images_pl, batch_img),
    #         # 'Integrated Gradients': de.explain('intgrad', label_logits, images_pl, new_batch_img),
    #         'Epsilon-LRP': de.explain('elrp', label_logits, images_pl, batch_img),
    #         'DeepLIFT (Rescale)': de.explain('deeplift', label_logits, images_pl, batch_img),
    #         # Perturbation-based (comment out to evaluate, but this will take a while!)
    #         #'Occlusion [15x15]':    de.explain('occlusion', label_logits, images_pl, batch_img, window_shape=(15,15,3), step=4)
    #     }    ####
    #     new_attributions = {
    #         # Gradient-based
    #         # NOTE: reduce_max is used to select the output unit for the class predicted by the classifier
    #         # For an example of how to use the ground-truth labels instead, see mnist_cnn_keras notebook
    #         'Saliency maps': de.explain('saliency', label_logits, images_pl, new_batch_img),
    #         'Gradient * Input': de.explain('grad*input', label_logits, images_pl, new_batch_img),
    #         # 'Integrated Gradients': de.explain('intgrad', label_logits, images_pl, new_batch_img),
    #         'Epsilon-LRP': de.explain('elrp', label_logits, images_pl, new_batch_img),
    #         'DeepLIFT (Rescale)': de.explain('deeplift', label_logits, images_pl, new_batch_img),
    #         # Perturbation-based (comment out to evaluate, but this will take a while!)
    #         #'Occlusion [15x15]':    de.explain('occlusion', label_logits, images_pl, batch_img, window_shape=(15,15,3), step=4)
    #     }    ####
    #     attributions['Saliency maps'] = np.concatenate((attributions['Saliency maps'],new_attributions['Saliency maps']),axis=0)
    #     attributions['Gradient * Input'] = np.concatenate((attributions['Gradient * Input'],new_attributions['Gradient * Input']),axis=0)
    #     attributions['Epsilon-LRP'] = np.concatenate((attributions['Epsilon-LRP'],new_attributions['Epsilon-LRP']),axis=0)
    #     attributions['DeepLIFT (Rescale)'] = np.concatenate((attributions['DeepLIFT (Rescale)'],new_attributions['DeepLIFT (Rescale)']),axis=0)
    #
    # n_cols = int(len(attributions)) + 1
    # n_rows = 2
    # fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(3 * n_cols, 3 * n_rows))
    #
    # for i, xi in enumerate(all_img):
    #     # xi = (xi - np.min(xi))
    #     # xi /= np.max(xi)
    #     ax = axes.flatten()[i * n_cols]
    #     ax.imshow(xi)
    #     ax.set_title('Original')
    #     ax.axis('off')
    #     for j, a in enumerate(attributions):
    #         axj = axes.flatten()[i * n_cols + j + 1]
    #         plot(attributions[a][i], xi=xi, axis=axj, dilation=.5, percentile=99, alpha=.2).set_title(a)
    ######
    label_logits = logits[0, 208]
    with DeepExplain(session=sess) as de:
        dlift = de.explain('deeplift', label_logits, images_pl, batch_img)

    grad_map_tensor = tf.gradients(label_logits, images_pl)[0]
    grad_map = sess.run(grad_map_tensor,
                        feed_dict={
                            images_pl: np.expand_dims(img, 0),
                            y_label: true_class
                        })

    gradient_saliency = saliency.GradientSaliency(graph, sess, label_logits,
                                                  images_pl)  # 1951/1874
    vanilla_mask_3d = gradient_saliency.GetMask(
        img, feed_dict={y_label: true_class})  # better
    vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d)

    # smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(img, feed_dict={y_label:true_class}) # much clear, 2204/2192
    # smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d)

    #
    # new_img = np.load('vgg16_60_70_35_45_30_0.0001_800_0.0_0.0_9000.npy')
    # new_grad_map = sess.run(grad_map_tensor,feed_dict={images_pl:np.expand_dims(new_img,0),y_label:true_class})
    # new_vanilla_mask_3d = gradient_saliency.GetMask(new_img, feed_dict={y_label:true_class}) # better
    # new_vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(new_vanilla_mask_3d)
    # new_smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(new_img, feed_dict={y_label:true_class}) # much clear, 2204/2192
    # new_smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(new_smoothgrad_mask_3d)

    #to_dec_center = (60,70)
    to_dec_center = (100, 65)
    #to_dec_radius = (35,45)
    to_dec_radius = (80, 60)
    to_inc_center = (120, 170)
    to_inc_radius = (40, 30)
    _map = vanilla_mask_grayscale
    print(calculate_region_importance(_map, to_dec_center, to_dec_radius))
    print(calculate_region_importance(_map, to_inc_center, to_inc_radius))

    # construct to_inc_region and to_dec_region
    to_dec_region = calculate_img_region_importance(grad_map_tensor,
                                                    to_dec_center,
                                                    to_dec_radius)
    to_inc_region = calculate_img_region_importance(grad_map_tensor,
                                                    to_inc_center,
                                                    to_inc_radius)

    # try NES (Natural evolutionary strategies)
    N = args.N
    sigma = args.sigma
    epsilon = round(args.eps, 2)
    epoch = args.epoch
    eta = args.lr
    #loss = to_dec_region/to_inc_region
    #old_loss = sess.run(loss,feed_dict={images_pl: np.expand_dims(img, 0), y_label: true_class})
    old_loss = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius,
                                       to_inc_center, to_inc_radius)
    num_list = '_'.join([
        'big', model_name,
        str(N),
        str(eta),
        str(epoch),
        str(sigma),
        str(epsilon)
    ])
    print(num_list)
    for i in range(epoch):
        delta = np.random.randn(int(N / 2), img_size * img_size * 3)
        delta = np.concatenate((delta, -delta), axis=0)
        grad_sum = 0
        f_value_list = []
        for idelta in delta:
            img_plus = np.clip(
                img + sigma * idelta.reshape(img_size, img_size, 3), 0, 1)
            #f_value = sess.run(loss,feed_dict={images_pl:np.expand_dims(img_plus,0),y_label:true_class})
            with DeepExplain(session=sess) as de:
                dlift = de.explain('deeplift', label_logits, images_pl,
                                   np.expand_dims(img_plus, 0))
            f_value = calculate_deeplift_loss(dlift, to_dec_center,
                                              to_dec_radius, to_inc_center,
                                              to_inc_radius)
            f_value_list.append(f_value)
            grad_sum += f_value * idelta.reshape(img_size, img_size, 3)
        grad_sum = grad_sum / (N * sigma)
        new_img = np.clip(
            np.clip(img - eta * grad_sum, old_img - epsilon,
                    old_img + epsilon), 0, 1)
        #new_loss, new_logits = sess.run([loss, logits],
        #                                feed_dict={images_pl: np.expand_dims(new_img, 0), y_label: true_class})
        with DeepExplain(session=sess) as de:
            dlift = de.explain('deeplift', label_logits, images_pl,
                               np.expand_dims(new_img, 0))
        new_loss = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius,
                                           to_inc_center, to_inc_radius)

        loss_list.append(new_loss)
        print("epoch:{} new:{}, old:{}, {}".format(i, new_loss, old_loss,
                                                   np.argmax(_prob)))
        sys.stdout.flush()
        img = np.array(new_img)
        if i % args.image_interval == 0:
            temp_name = num_list + '_' + str(i + init_epoch)
            np.save(temp_name, new_img)
        if i % args.image_interval == 0:
            np.save('loss_' + temp_name, loss_list)
    np.save(num_list + '_' + str(epoch + init_epoch), new_img)
    np.save('loss_' + num_list + '_' + str(epoch + init_epoch), loss_list)
Ejemplo n.º 26
0
mask_imgs = np.array(mask_imgs)
mask_dilated_imgs = np.array(mask_dilated_imgs)
naive_imgs = np.array(naive_imgs)

# particular case -

style_img = style_imgs[file_index]
naive_img_o = naive_imgs[file_index]
mask_img = mask_imgs[file_index]
mask_dilated_img = mask_dilated_imgs[file_index]

mask_img = mask_dilated_img / 255.0
mask_img = np.expand_dims(mask_img, axis=0)
mask_img = tf.cast(mask_img, tf.float32)

naive_img = K.variable(utils.preprocess_img(naive_img_o))
style_img = K.variable(utils.preprocess_img(style_img))
img_rows, img_cols = naive_img.shape[1], naive_img.shape[2]

fusion_img = K.placeholder((1, img_rows, img_cols, 3))

# combine the 3 images into a single Keras tensor
input_tensor = K.concatenate([naive_img, style_img, fusion_img], axis=0)

# build the vgg16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False)
print('Model loaded.')
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
Ejemplo n.º 27
0
def run_gan(dataset, discriminator, generator, num_epoch=10):
    """Helper function for training GANs"""
    tf.reset_default_graph()

    # number of images for each batch
    batch_size = 128
    # noise dimension
    noise_dim = 96

    # shape of train images
    img_shape = list(dataset[0].shape)
    height = img_shape[0]
    width = img_shape[1]
    channels = img_shape[2]

    # check image shape
    assert height == 32, 'Error: image height should be 32'
    assert width == 32, 'Error: image width  should be 32'

    # placeholder for images from the training dataset
    placeholder_size = [None] + img_shape
    x = tf.placeholder(tf.float32, placeholder_size)
    # random noise fed into our generator
    z = sample_noise(batch_size, noise_dim)
    # generated images
    G_sample = generator(z, channels)

    with tf.variable_scope('') as scope:
        img_preproc = preprocess_img(x)
        logits_real = discriminator(img_preproc)
        # Re-use discriminator weights on new inputs
        scope.reuse_variables()
        logits_fake = discriminator(G_sample)

    # get solvers
    D_solver, G_solver = get_solvers()

    # get discriminator and generator loss
    D_loss, G_loss = gan_loss(logits_real, logits_fake)

    # Get the list of variables for the discriminator and generator
    D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               'discriminator')
    G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')

    # setup training steps
    D_train_step = D_solver.minimize(D_loss, var_list=D_vars)
    G_train_step = G_solver.minimize(G_loss, var_list=G_vars)

    with get_session() as sess:
        sess.run(tf.global_variables_initializer())
        train_gan(sess,
                  G_train_step,
                  G_loss,
                  G_sample,
                  D_train_step,
                  D_loss,
                  x,
                  dataset,
                  batch_size=batch_size,
                  num_epoch=num_epoch)
Ejemplo n.º 28
0
import utils
import cv2
import os
import json
import constant

if __name__ == "__main__":  
    imgurl = 'input\\13.png'
    img = cv2.imread(imgurl)
    img = utils.preprocess_img(img)

    # get bounding box of all rectangles
    bounding_boxes = utils.box_extraction(img)
    print(f'[INFO] bounding_boxes: {len(bounding_boxes)}')

    # get invoice data
    data = utils.get_invoice_data(bounding_boxes, img)

    # write output to json
    base = os.path.basename(imgurl)
    output = constant.OUTPUT_PATH + os.path.splitext(base)[0] + '.json'
    with open(output, 'w+') as fp:
        json.dump(data, fp, indent=4)
    print(f'[INFO] exported to {output}')
Ejemplo n.º 29
0
def main():
    args = parse_args()
    raw_img = cv2.imread(args.input, 1)
    raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR)

    raw_img = np.float32(raw_img) / 255
    image, norm_image = preprocess_img(raw_img)
    model = models.__dict__[args.arch](pretrained=True).eval()
    model = model.cuda()

    rise = RISE(model, input_size=(224, 224), batch_size=40)
    rise.generate_masks()
    gd = GradCAM(model, target_layer=args.target_layer)
    gc = GroupCAM(model, target_layer=args.target_layer)

    rise_heatmap = rise(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    gd_heatmap = gd(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    gc_heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data

    if args.output is not None:
        rise_cam = show_cam(image, rise_heatmap, "rise_base.png")
        gd_cam = show_cam(image, gd_heatmap, "gd_base.png")
        gc_cam = show_cam(image, gc_heatmap, "gc_base.png")

    if args.ins_del:
        blur = lambda x: gaussian_blur2d(
            x, kernel_size=(51, 51), sigma=(50., 50.))
        insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur)
        deletion = CausalMetric(model,
                                'del',
                                224 * 2,
                                substrate_fn=torch.zeros_like)

        norm_image = norm_image.cpu()
        gd_heatmap = gd_heatmap.cpu().numpy()
        gc_heatmap = gc_heatmap.cpu().numpy()
        rise_heatmap = rise_heatmap.cpu().numpy()

        gc_ins_score = insertion.evaluate(norm_image,
                                          mask=gc_heatmap,
                                          cls_idx=None)
        gd_ins_score = insertion.evaluate(norm_image,
                                          mask=gd_heatmap,
                                          cls_idx=None)
        rise_ins_score = insertion.evaluate(norm_image,
                                            mask=rise_heatmap,
                                            cls_idx=None)

        gc_del_score = deletion.evaluate(norm_image,
                                         mask=gc_heatmap,
                                         cls_idx=None)
        gd_del_score = deletion.evaluate(norm_image,
                                         mask=gd_heatmap,
                                         cls_idx=None)
        rise_del_score = deletion.evaluate(norm_image,
                                           mask=rise_heatmap,
                                           cls_idx=None)

        legend = ["RISE", "Grad-CAM", "Group-CAM"]
        ins_scores = [
            auc(rise_ins_score),
            auc(gd_ins_score),
            auc(gc_ins_score)
        ]
        del_scores = [
            auc(rise_del_score),
            auc(gd_del_score),
            auc(gc_del_score)
        ]
        ins_scores = [round(i * 100, 2) for i in ins_scores]
        del_scores = [round(i * 100, 2) for i in del_scores]
        ins_legend = [i + ": " + str(j) for i, j in zip(legend, ins_scores)]
        del_legend = [i + ": " + str(j) for i, j in zip(legend, del_scores)]

        n_steps = len(gd_ins_score)

        x = np.arange(n_steps) / n_steps
        plt.figure(figsize=(12, 5))

        plt.xlim(-0.1, 1.1)
        plt.ylim(0, 1.05)

        plt.subplot(121)
        plt.plot(x, rise_ins_score)
        plt.plot(x, gd_ins_score)
        plt.plot(x, gc_ins_score)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.legend(ins_legend, loc='best', fontsize=15)
        plt.title("Insertion Curve", fontsize=15)

        plt.subplot(122)
        plt.plot(x, rise_del_score)
        plt.plot(x, gd_del_score)
        plt.plot(x, gc_del_score)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.legend(del_legend, loc='best', fontsize=15)
        plt.title("Deletion Curve", fontsize=15)
        plt.show()
Ejemplo n.º 30
0
 def screen(self):
     return (utils.preprocess_img(self._previous_screen, self._screen))