Python preprocess_img Beispiele, utils.preprocess_img Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: drive.py Projekt: makkena19/CarND-Behavioral-Cloning-P3

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = data["steering_angle"]
        # The current throttle of the car
        throttle = data["throttle"]
        # The current speed of the car
        speed = data["speed"]
        # The current image from the center camera of the car
        imgString = data["image"]
        image = Image.open(BytesIO(base64.b64decode(imgString)))
        image_array = preprocess_img(np.asarray(image))
        steering_angle = float(
            model.predict(image_array[None, :, :, :], batch_size=1))
        min_speed = 8
        max_speed = 10
        if float(speed) < min_speed:
            throttle = 1.0
        elif float(speed) > max_speed:
            throttle = -1.0
        else:
            throttle = 0.1

        print(steering_angle, throttle)
        send_control(steering_angle, throttle)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True)

Beispiel #2

0

Datei anzeigen

    def run(self, img):
        '''yolov5 trt inference func

        :param img: np img
        :return:
            dst_list : [(x1,y1,x2,y2,label,conf),...]
        '''
        dst_list = []
        # pre process
        resize_img = preprocess_img(img)
        resize_img = torch.from_numpy(resize_img).to(self.device)
        # inference
        output = self.model(resize_img)
        # post process
        pred = self.post_process(output)
        for i, det in enumerate(pred):  # detections per image
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(resize_img.shape[2:], det[:, :4],
                                          img.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    if float('%.2f' % conf) > self.conf_thresh:
                        x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(
                            xyxy[2]), int(xyxy[3])
                        # label is self.names[int(cls)], score is conf
                        dst_list.append((x1, y1, x2, y2, self.names[int(cls)],
                                         float('%.2f' % conf)))

        return dst_list

Beispiel #3

0

Datei anzeigen

 def run(self, img):
     resize_img = preprocess_img(img, self.scale_factor)
     output = self.trt.run(resize_img)
     full_mask = self.post_process(output, img.size)
     # full_mask = full_mask > self.conf_thresh
     mask_image = self.mask_to_image(full_mask)
     return mask_image

Beispiel #4

0

Datei anzeigen

Datei: ten_point_test.py Projekt: AustinCStone/StructureFromMotion

def test_ten_point(render_ground_truth=False, render_reconstruction=False):
    """ A simple test to check if we can recover the 3D positions of 10 known 3D points
    and camera parameters given two images of the points where the correspondences
    are known to be correct. """

    points, colors = get_points() # get some known 3D points, each with a color
    camera_params, focal_x, focal_y, rows, cols = get_cameras() # get some known cameras

    # project the 3d points into each camera
    cam_1_points2d = project(points, camera_params[np.asarray([0 for _ in points])],
                             focal_x, focal_y)
    cam_2_points2d = project(points, camera_params[np.asarray([1 for _ in points])],
                             focal_x, focal_y)
    # draw the projected points in the camera images
    cam_1_img = utils.draw_points2d(cam_1_points2d, colors, rows, cols, show=False)
    cam_2_img = utils.draw_points2d(cam_2_points2d, colors, rows, cols, show=False)

    # find correspondences between the two images
    kp1, kp2, n_kp1, n_kp2 = matcher.find_matching_points_mock(utils.preprocess_img(cam_1_img),
                                                               utils.preprocess_img(cam_2_img))

    assert len(kp1) == len(n_kp1) == len(kp2) == len(n_kp2) == len(points)

    # keep track of which correspondence maps to which color
    kp_to_color = {i: cam_1_img[kp[1], kp[0]] for i, kp in enumerate(kp1)}

    if render_ground_truth: # show the ground truth geometry
        render_pts_and_cams(points, colors, camera_params[:, 3:], camera_params[:, :3],
                            focal_x, use_spheres=True)

    # run the solver with the correspondences to generate a reconstruction
    camera_kps = np.stack([n_kp1, n_kp2], axis=0)
    camera_params, points_3d, camera_indices, point_indices, points_2d, focal_length = \
        solver.get_solver_params(camera_kps)
    recon_camera_params, recon_3d_points, recon_focal_length, _ = solver.run_solver(
        camera_params, points_3d, camera_indices, point_indices, points_2d, focal_length,
        toss_outliers=False)

    recon_colors = [kp_to_color[i] for i in range(len(points_3d))]
    if render_reconstruction:
        render_pts_and_cams(recon_3d_points, recon_colors, recon_camera_params[:, 3:], 
                            recon_camera_params[:, :3],
                            recon_focal_length, use_spheres=True)

    check_image_match(recon_3d_points, recon_camera_params, recon_focal_length, recon_colors,
                      points, camera_params, focal_x, colors, rows, cols)

Beispiel #5

0

Datei anzeigen

def worker(q):
    '''
    q: a multiprocessing.Queue object. Each item in the queue contains a tuple
    of url, image_bytes)

    a worker continously take images from the work queue, preprocess the image,
    send it to the prediction cluster, and write results to database in batches.
    '''

    mydb = MySQLdb.connect(host=DB_HOST,
                           port=DB_PORT,
                           user=DB_USER,
                           passwd=DB_PASSWD,
                           db=DB_NAME)
    mycursor = mydb.cursor()

    count = 0
    val = []
    sql = "INSERT INTO images (url, process_date, flag) VALUES (%s, %s, %s)"

    while True:
        # block=True: no exception will be thrown when queue is empty
        # timeout = 5 : timeout expection will be thrown after 5 seconds
        try:
            url, image_bytes = q.get(block=True, timeout=5)
        except:
            break
        if url == 'start':
            print('start processing at', time())
            continue
        if url == 'done':
            break

        input = preprocess_img(image_bytes)
        payload = {"instances": [{'input_image': input.tolist()}]}
        r = requests.post(MODEL_SERVER, json=payload)
        flag = decode_response(r)
        process_date = datetime.datetime.today().strftime('%Y-%m-%d')
        val.append((url, process_date, flag))
        count += 1

        # save and commit to database after COMMIT_SIZE records are accumulated
        if count == COMMIT_SIZE:
            mycursor.executemany(sql, val)
            mydb.commit()
            val = []
            count = 0

    # save and commit the remaining records before closing connection
    if count > 0:
        mycursor.executemany(sql, val)
    mydb.commit()
    mycursor.close()
    mydb.close()

Beispiel #6

0

Datei anzeigen

def inference_image(model, logger, img=np.array(Image.open(inf_img_src).convert('RGB')), compare=True, record=True, dpi=500):

    if compare:
        assert img.shape[1] == IMG_DIM * 2
        img, mask = split_img(img, IMG_DIM)

    orig_img = img.copy()
    img = preprocess_img(img)
    img = img.to(device)

    # Inference: 
    y_pred = model(img)
    y_pred = torch.argmax(y_pred, dim=1)
    y_pred = y_pred[0].cpu().detach().numpy()

    plt.figure(figsize=(IMG_DIM/dpi, IMG_DIM/dpi), dpi=dpi)
    plt.figimage(y_pred)
    plt.axis('off')
    buf = io.BytesIO()
    plt.savefig(buf, format='jpg', dpi=dpi)
    buf.seek(0)

    y_pred_out = Image.open(buf).resize((IMG_DIM, IMG_DIM), Image.LANCZOS).convert("RGB")
    y_pred_out = cv2.cvtColor(np.array(y_pred_out), cv2.COLOR_RGB2BGR)

    # compare 
    if compare:

        # Get GT
        cluster_model = get_clustering_model(logger)
        mask = cv2.resize(mask, (IMG_DIM, IMG_DIM), interpolation=cv2.INTER_AREA)
        class_map = cluster_model.predict(mask.reshape(-1, 3)).reshape(IMG_DIM, IMG_DIM)
        
        # IoU
        intersection = np.logical_and(class_map, y_pred)
        union = np.logical_or(class_map, y_pred)
        iou_score = np.sum(intersection) / np.sum(union)

        # Visualize
        class_map_out = cv2.putText(mask, 'GT, IoU: {0}'.format(round(iou_score, 3)), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) 
        y_pred_out = cv2.putText(y_pred_out, 'Prediction', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) 
        orig_img = cv2.putText(orig_img, 'Image', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)
        y_pred_out = np.concatenate((orig_img, y_pred_out, class_map_out), axis=1)

    # Record
    if record:
        cv2.imwrite(inf_out_img_src, y_pred_out)
        logger.info("(3) Inference Finished. Output image: {0}".format(inf_out_img_src))
    
    cv2.imshow("Output", y_pred_out)

    return y_pred_out

Beispiel #7

0

Datei anzeigen

def transform_img(img_path, out_dir, img_size):
    img_np = preprocess_img(get_img(img_path, size=img_size))
    img_np = np.expand_dims(img_np, 0)

    # generator
    gen = get_module(img_np.shape, ctx)
    gen.load_params(args.checkpoint)

    data = mx.nd.array(img_np)
    gen.forward(mx.io.DataBatch([data], [0]), is_train=False)

    save_file = os.path.basename(os.path.normpath(img_path))
    save_output(gen, os.path.join(out_dir, save_file))

Beispiel #8

0

Datei anzeigen

def ENAS(train_data):
    clf = ak.ImageClassifier(verbose=True)
    fold = FLAGS.enas_fold
    kf = KFold(n_splits=fold, shuffle=True, random_state=100)
    for _, test_index in kf.split(train_data):
        debug_data = train_data.iloc[test_index]
        break
    print('train size', debug_data.shape[0])
    x_train = preprocess_img(debug_data['img'])

    category = debug_data['class_id'].unique()
    print('class size ', category.shape[0])
    category_dict = dict((category[i], i) for i in range(category.shape[0]))
    y_train = debug_data['class_id'].apply(lambda id: category_dict[id]).values
    clf.fit(x_train, y_train, time_limit=FLAGS.enas_time)

Beispiel #9

0

Datei anzeigen

Datei: cat_dataset.py Projekt: StalkerMeyr/GANs_for_cats_and_flowers

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = cv2.imread(img_path)

        # mirror img with a 50% chance
        if self.mirror:
            if random.random() > 0.5:
                img = img[:, ::-1, :]
        # resize
        img = cv2.resize(img, (self.size, self.size))

        # normalize
        img = preprocess_img(img)

        return torch.tensor(img.astype(np.float32))

Beispiel #10

0

Datei anzeigen

Datei: test_CNN_ROI1_ROI2Model.py Projekt: greatsharma/Facial_Emotion_Recognition

def get_model_compatible_input(gray_frame, face):
    img_arr = utils.align_face(gray_frame, face, desiredLeftEye)
    img_arr = utils.preprocess_img(img_arr, resize=False)

    landmarks = shape_predictor(
        gray_frame,
        face,
    )
    roi1, roi2 = utils.extract_roi1_roi2(gray_frame, landmarks)
    roi1 = np.expand_dims(roi1, 0)
    roi2 = np.expand_dims(roi2, 0)
    roi1 = roi1 / 255.
    roi2 = roi2 / 255.

    return [img_arr, roi1, roi2]

Beispiel #11

0

Datei anzeigen

def yield_from_dir(in_dir):
    files = get_imagenames(in_dir)
    for fn, fpath in enumerate(files):
        if not args.gray:
            # Open image as a CxHxW torch.Tensor
            img = cv2.imread(fpath)
            # from HxWxC to CxHxW, RGB image
            img = (cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).transpose(2, 0, 1)
        else:
            # from HxWxC to  CxHxW grayscale image (C=1)
            img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)

        img, expanded_h, expanded_w = preprocess_img(img,
                                                     expand_if_needed=False,
                                                     expand_axis0=False)
        yield fpath, img

Beispiel #12

0

Datei anzeigen

Datei: model_mnist2.py Projekt: kesamet/deep-learning-expts

    def __init__(self, config):
        """
        Initializes the model
        :param config: A model configuration object of type Config
        """
        self.config = config
        self.input_real, self.input_z = model_inputs(self.config.real_dim,
                                                     self.config.z_dim)

        G_model = generator(self.input_z)
        logits_real = discriminator(preprocess_img(self.input_real))
        logits_fake = discriminator(G_model, reuse=True)
        self.D_loss, self.G_loss = wgangp_loss(logits_real, logits_fake,
                                               self.config.batch_size,
                                               self.input_real, G_model)
        self.D_opt, self.G_opt = model_opt(self.D_loss, self.G_loss,
                                           self.config.lr, self.config.beta1)

Beispiel #13

0

Datei anzeigen

def haar_detector(frame):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face_frame = np.zeros(gray_frame.shape, dtype="uint8")

    offset = 15
    x_pos, y_pos = 10, 40

    faces = cascade_detector.detectMultiScale(gray_frame, 1.32, 5)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face),
                                   desiredLeftEye)
        face_frame = cv2.resize(img_arr, (48, 48),
                                interpolation=cv2.INTER_CUBIC)
        img_arr = utils.preprocess_img(img_arr, resize=False)

        predicted_proba = model.predict(img_arr)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
    return frame, face_frame

Beispiel #14

0

Datei anzeigen

def dlib_detector(frame_orig):
    frame = frame_orig.copy()
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    offset = 15
    x_pos, y_pos = 10, 40

    faces = hog_detector(gray_frame)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        img_arr = utils.align_face(gray_frame, face, desiredLeftEye)
        img_arr = utils.preprocess_img(img_arr, resize=False)

        predicted_proba = model.predict(img_arr)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = rect_to_bb(face)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))

    return frame

Beispiel #15

0

Datei anzeigen

Datei: demo.py Projekt: wofmanaf/Group-CAM

def main():
    args = parse_args()
    raw_img = cv2.imread(args.input, 1)
    raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR)

    raw_img = np.float32(raw_img) / 255
    image, norm_image = preprocess_img(raw_img)
    model = models.__dict__[args.arch](pretrained=True).eval()
    model = model.cuda()

    gc = GradCAM(model, target_layer=args.target_layer)

    heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    cam = show_cam(image, heatmap, args.output)

    if args.ins_del:
        blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.))
        insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur)
        deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like)
        out_video_path = './VIDEO'
        check_path_exist(out_video_path)

        ins_path = os.path.join(os.path.join(out_video_path, "ins"))
        del_path = os.path.join(os.path.join(out_video_path, "del"))
        check_path_exist(ins_path)
        check_path_exist(del_path)

        norm_image = norm_image.cpu()
        heatmap = heatmap.cpu().numpy()

        ins_score = insertion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=ins_path)
        del_score = deletion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=del_path)
        print("\nDeletion - {:.5f}\nInsertion - {:.5f}".format(auc(del_score), auc(ins_score)))

        # generate video
        video_ins = os.path.join(ins_path, args.input.split('/')[-1].split('.')[0] + '.avi')
        video_del = os.path.join(del_path, args.input.split('/')[-1].split('.')[0] + '.avi')
        cmd_str_ins = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(ins_path, video_ins)
        cmd_str_del = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(del_path, video_del)
        os.system(cmd_str_ins)
        os.system(cmd_str_del)

Beispiel #16

0

Datei anzeigen

Datei: flowers_dataset.py Projekt: StalkerMeyr/GANs_for_cats_and_flowers

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = cv2.imread(img_path)

        # center crop
        h, w = img.shape[:2]
        min_side = min(h, w)
        top, bot = (h - min_side)//2, h - (h - min_side)//2
        left, right = (w - min_side) // 2, w - (w - min_side) // 2
        img = img[top:bot, left:right, :]

        # mirror img with a 50% chance
        if self.mirror:
            if random.random() > 0.5:
                img = img[:, ::-1, :]

        # resize
        img = cv2.resize(img, (self.size, self.size))

        # normalize
        img = preprocess_img(img)

        return torch.tensor(img.astype(np.float32))

Beispiel #17

0

Datei anzeigen

 def DNN_DataSet(self, df):
     """
     """
     return preprocess_img(df['img'])

Beispiel #18

0

Datei anzeigen

def classify(image):
    model = get_model("efficientnet-b0")
    img = preprocess_img(image)
    return predict(model, img)

Beispiel #19

0

Datei anzeigen

def dnn_detector(frame):
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123],
                                 False, False)

    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    idx = 0
    offset = 15
    x_pos, y_pos = 10, 40

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face_frame = np.zeros(gray_frame.shape, dtype="uint8")

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            idx += 1
            x1 = int(detections[0, 0, i, 3] * frame_width)
            y1 = int(detections[0, 0, i, 4] * frame_height)
            x2 = int(detections[0, 0, i, 5] * frame_width)
            y2 = int(detections[0, 0, i, 6] * frame_height)
            bboxes.append([x1, y1, x2, y2])

            face = [x1, y1, x2 - x1, y2 - y1]

            if hist_eq:
                gray_frame = cv2.equalizeHist(gray_frame)

            img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face),
                                       desiredLeftEye)
            face_frame = cv2.resize(img_arr, (48, 48),
                                    interpolation=cv2.INTER_CUBIC)
            img_arr = utils.preprocess_img(img_arr, resize=False)

            predicted_proba = model.predict(img_arr)
            predicted_label = np.argmax(predicted_proba[0])

            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            text = f"Person {idx}: {label2text[predicted_label]}"
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x1 + 5,
                                           y1,
                                           font_scale=0.4)

            text = f"Person {idx} :  "
            y_pos = y_pos + 2 * offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
            for k, v in label2text.items():
                text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
                y_pos = y_pos + offset
                utils.draw_text_with_backgroud(frame,
                                               text,
                                               x_pos,
                                               y_pos,
                                               font_scale=0.3,
                                               box_coords_2=(2, -2))
    return frame, face_frame

Beispiel #20

0

Datei anzeigen

def main(args):
    for arg in vars(args):
        print(arg, getattr(args, arg))

    model_name = args.model_name
    img_path = args.img_path
    img_label_path = 'imagenet.json'
    true_class = args.true_label
    adversarial_label = args.adv_label
    demo_epoch = args.epoch
    demo_eps = args.eps
    demo_lr = args.lr
    label_num = args.label_num
    lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss

    # load model
    sess, graph, img_size, images_v, logits = load_pretrain_model(model_name)
    probs = tf.nn.softmax(logits)
    print("sucessfully load model")

    if args.write_summary:
        unique_path_name = "up{}down{}ce{}epoch{}lr{}".format(
            args.lambda_up, args.lambda_down, args.lambda_label_loss,
            args.epoch, args.lr)
        final_summary_path = os.path.join(args.summary_path, unique_path_name)
        if not os.path.exists(final_summary_path):
            os.makedirs(final_summary_path)
        summary_writer = tf.summary.FileWriter(final_summary_path, graph)

    global_step = tf.Variable(0, name="global_step", trainable=False)
    step_init = tf.variables_initializer([global_step])

    y_hat = tf.placeholder(tf.int32, ())
    label_logits = tf.gather_nd(logits, [[0, y_hat]])

    img = PIL.Image.open(img_path)
    img = preprocess_img(img, img_size)
    batch_img = np.expand_dims(img, 0)
    imagenet_label = load_imagenet_label(img_label_path)

    # -------------------
    # Step 1: classify the image with original model
    p = sess.run(probs, feed_dict={images_v: batch_img})[0]
    predict_label = np.argmax(p)
    #classify(img, p, imagenet_label, correct_class=true_class, is_cluster=True)

    # -------------------
    # Step 2: Construct adversarial examples
    image_pl = tf.placeholder(tf.float32, (1, img_size, img_size, 3))
    assign_op = tf.assign(images_v, image_pl)
    learning_rate = tf.placeholder(tf.float32, ())
    var_eps = tf.placeholder(tf.float32, ())
    labels = tf.one_hot(y_hat, label_num)
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                   labels=labels)[0]

    projected = tf.clip_by_value(
        (tf.clip_by_value(images_v, image_pl - var_eps, image_pl + var_eps)),
        0, 1)
    with tf.control_dependencies([projected]):
        project_step = tf.assign(images_v, projected)

    # initialization step
    _ = sess.run([assign_op, step_init], feed_dict={image_pl: batch_img})

    # construct targeted attack
    # feed_dict_optim = {image_pl:batch_img,
    #                    y_hat:adversarial_label,
    #                    learning_rate:demo_lr}
    #
    # feed_dict_proj = {image_pl:batch_img,
    #                   var_eps:demo_eps}
    # optim_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, var_list=[images_v])
    # model_train(sess=sess,
    #             optim_step=optim_step,
    #             project_step=project_step,
    #             loss=loss,
    #             feed_dict_optim=feed_dict_optim,
    #             feed_dict_project=feed_dict_proj,
    #             epoch=10)
    #
    # adv_img = np.squeeze(images_v.eval(),0)
    # adv_prob = sess.run(probs,feed_dict={images_v:np.expand_dims(adv_img,0)})
    # classify(adv_img, adv_prob[0],imagenet_label,correct_class=281,target_class=adversarial_label)
    #
    # # show the saliency map
    # feed_dict_gradient = {y_hat:true_class}
    # _ = show_gradient_map(graph=graph,
    #                   sess=sess,
    #                   y=label_logits,
    #                   x=images_v,
    #                   img=img,
    #                   is_integrated=False,
    #                   is_smooth=False,
    #                   feed_dict=feed_dict_gradient)
    #---------------
    # use gradient descent to control the saliency map

    # original gradient intensity
    map3D, map_grey = show_gradient_map(graph=graph,
                                        sess=sess,
                                        y=label_logits,
                                        x=images_v,
                                        img=img,
                                        is_integrated=False,
                                        is_smooth=True,
                                        feed_dict={y_hat: true_class},
                                        is_cluster=args.is_cluster)

    center_more, radius_more = (100, 110), 10
    center_less, radius_less = (100, 70), 10
    gradient_more = calculate_region_importance(map_grey, center_more,
                                                radius_more)
    gradient_less = calculate_region_importance(map_grey, center_less,
                                                radius_less)
    print(
        "region 1 gradient intensity %.3f, region 2 gradient intensity %.3f" %
        (gradient_more, gradient_less))

    # construct new loss function
    grad_map = tf.gradients(label_logits, images_v)[0]
    to_down_gradient = calculate_img_region_importance(grad_map, center_more,
                                                       radius_more)
    to_up_gradient = calculate_img_region_importance(grad_map, center_less,
                                                     radius_less)
    grad_loss = -lambda_up * to_up_gradient + lambda_down * to_down_gradient
    final_loss = grad_loss + lambda_label_loss * loss
    if args.write_summary:
        up_gradient_summary = tf.summary.scalar("up_gradient", to_up_gradient)
        down_gradient_summary = tf.summary.scalar("down_gradient",
                                                  to_down_gradient)
        loss_summary = tf.summary.scalar("loss", loss)
        train_summary_op = tf.summary.merge_all()
    change_grad_optim_step = tf.train.GradientDescentOptimizer(
        learning_rate=demo_lr).minimize(final_loss,
                                        var_list=[images_v],
                                        global_step=global_step)
    for i in range(demo_epoch):
        if args.write_summary:
            _, _loss, step, summary_str = sess.run([
                change_grad_optim_step, final_loss, global_step,
                train_summary_op
            ],
                                                   feed_dict={
                                                       image_pl: batch_img,
                                                       y_hat: true_class,
                                                       learning_rate: demo_lr
                                                   })
            summary_writer.add_summary(summary_str, global_step=step)
        else:
            _, _loss, step = sess.run(
                [change_grad_optim_step, final_loss, global_step],
                feed_dict={
                    image_pl: batch_img,
                    y_hat: true_class,
                    learning_rate: demo_lr
                })

        sess.run([project_step],
                 feed_dict={
                     image_pl: batch_img,
                     var_eps: demo_eps
                 })
        print("%d loss = %g" % (i, _loss))
        if i % args.image_interval == 0:
            adv_img = np.squeeze(images_v.eval(), 0)
            # check the prediction result
            p_adv = sess.run(probs, feed_dict={images_v: batch_img})[0]
            predict_label_adv = np.argmax(p_adv)
            #classify(adv_img, p_adv, imagenet_label, correct_class=true_class,is_cluster=args.is_cluster)

            # check the gradient map
            map3D_adv, map_grey_adv = show_gradient_map(
                graph=graph,
                sess=sess,
                y=label_logits,
                x=images_v,
                img=adv_img,
                is_integrated=False,
                is_smooth=False,
                feed_dict={y_hat: true_class},
                is_cluster=args.is_cluster)

            adv_gradient_more = calculate_region_importance(
                map_grey_adv, center_more, radius_more)
            adv_gradient_less = calculate_region_importance(
                map_grey_adv, center_less, radius_less)

            if args.write_summary:
                map_grey_adv = tf.expand_dims(tf.expand_dims(map_grey_adv, 0),
                                              3)
                adv_map_sum = tf.summary.image(
                    'adv_map' + str(i), tf.convert_to_tensor(map_grey_adv))
                adv_str = sess.run(adv_map_sum)
                summary_writer.add_summary(adv_str)
            print(
                "Adversarial Case: predict label: %d, big region  gradient intensity: %.3f, small region gradient intensity: %.3f"
                % (predict_label_adv, adv_gradient_more, adv_gradient_less))
            print(
                "Normal Case: predict label: %d, big region gradient intensity: %.3f, small region gradient intensity: %.3f"
                % (predict_label, gradient_more, gradient_less))

    # write original map
    map_grey = tf.expand_dims(tf.expand_dims(map_grey, 0), 3)
    orig_map_sum = tf.summary.image('orig_map', tf.convert_to_tensor(map_grey))
    orig_str = sess.run(orig_map_sum)
    summary_writer.add_summary(orig_str)

Beispiel #21

0

Datei anzeigen

Datei: detect_handsign.py Projekt: wingsoflight/HalykBank

parser.add_argument('path',
                    type=str,
                    nargs=1,
                    help='path to pdf file',
                    metavar='--p')
args = parser.parse_args()
pdf_path = args.path[0]
doc = fitz.open(pdf_path)
clf = joblib.load('SVMcls.pkl')
for i, page in enumerate(doc):
    print('Converting page no. {} to image'.format(i + 1))
    zoom = 2  # zoom factor
    mat = fitz.Matrix(zoom, zoom)
    pixmap = page.getPixmap(matrix=mat)
    page_im = pix2np(pixmap)
    prep_im = preprocess_img(page_im)
    rects = get_bounding_rects(prep_im)
    bboxes = []
    print('Detected {} candidate segments.'.format(len(rects)))
    for rect in rects:
        x, y, w, h = rect
        crop = prep_im[y:y + h, x:x + w]
        hist = np.reshape(get_hist(crop), (1, -1))
        pred = clf.predict(hist)
        if pred == 1:
            bboxes.append((x, y, w, h))
    print('Found {} handsigns at page {}'.format(len(bboxes), i + 1))
    if bboxes:
        for bbox in bboxes:
            x, y, w, h = bbox
            cv2.rectangle(page_im, (x, y), (x + w, y + h), (0, 255, 0), 2)

Beispiel #22

0

Datei anzeigen

args = parse_arguments('')
model_name = args.model_name
img_path = args.img_path
img_label_path = 'imagenet.json'
true_class = args.true_label
adversarial_label = args.adv_label
label_num = args.label_num
lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss
sess, graph, img_size, images_pl, logits = load_pretrain_model(model_name,
                                                               is_explain=True)
y_label = tf.placeholder(dtype=tf.int32, shape=())
img_label = load_imagenet_label(img_label_path)

img = PIL.Image.open(img_path)
img = preprocess_img(img, img_size)
#new_img = np.load('big_vgg16_30_0.0001_1000_0.001_0.03_3000.npy') # 258
new_img = np.load('vgg16_60_70_35_45_30_0.0001_800_0.0_0.0_9000.npy')  # 208

batch_img = np.expand_dims(img, 0)
new_batch_img = np.expand_dims(new_img, 0)

true_class = 208
label_logits = logits[0, true_class]
gradient_saliency = saliency.GradientSaliency(graph, sess, label_logits,
                                              images_pl)  # 1951/1874

attributions = OrderedDict()
with DeepExplain(session=sess) as de:
    ori_attributions = {
        # Gradient-based

Beispiel #23

0

Datei anzeigen

        if frames_q:
            imgs = image2pipe.utils.yield_from_queue(frames_q)
        else:
            # Get ordered list of filenames
            print("\tOpen sequence in folder: ", args.read_path)
            imgs = yield_from_dir(args.read_path)

        seq_list = []
        seq_outnames = []

        for fn_or_fpath, img in imgs:
            if type(fn_or_fpath) is int:
                fpath = "%06d.png" % fn_or_fpath
                # from HxWxC to CxHxW, RGB image
                img = img.transpose(2, 0, 1)
                img, expanded_h, expanded_w = preprocess_img(
                    img, expand_if_needed=False, expand_axis0=False)
            else:
                fpath = fn_or_fpath

            print("Load img:", fpath, img.shape)

            seq_list.append(img)
            seq_outnames.append(os.path.basename(fpath))
            seq = np.stack(seq_list, axis=0)
            # return seq, expanded_h, expanded_w

            if len(seq_list) == NUM_IN_FR_EXT:
                print("Infer batch ...")

                seq = torch.from_numpy(seq).to(device)
                seq_time = time.time()

Beispiel #24

0

Datei anzeigen

Datei: train.py Projekt: TigerCouple/mxnet-fast-neural-style

    mod.symbol.save(model_save_path + '.json')


if __name__ == "__main__":
    parser = build_parser()
    args = parser.parse_args()
    check_opts(args)

    # init
    ctx = mx.gpu(args.gpu) if args.gpu >= 0 else mx.cpu()
    ctx = mx.cpu()
    vgg_params = mx.nd.load(args.vgg_path)

    # init style
    print('load style image', args.style_image)
    style_np = preprocess_img(get_img(args.style_image))
    style_np = np.expand_dims(style_np, 0)
    dshape = style_np.shape

    style_exec = get_style_excutor(vgg_params, dshape, ctx)
    style_exec.data[:] = mx.nd.array(style_np)
    style_exec.executor.forward()
    style_array = [
        mx.nd.repeat(arr.copyto(ctx), axis=0, repeats=args.batch_size)
        for arr in style_exec.outputs
    ]
    del style_exec
    #
    TRAIN_SHAPE = (256, 256)
    dshape = (args.batch_size, 3, *TRAIN_SHAPE)

Beispiel #25

0

Datei anzeigen

Datei: saliency_map.py Projekt: corn23/AdverserialExample

def main(args):
    for arg in vars(args):
        print(arg, getattr(args, arg))

    model_name = args.model_name
    img_path = args.img_path
    img_label_path = 'imagenet.json'
    true_class = args.true_label
    adversarial_label = args.adv_label
    label_num = args.label_num
    lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss

    # model_name = 'inception_v3'
    # img_path = './picture/dog_cat.jpg'
    # img_label_path = 'imagenet.json'
    # true_class = 208
    sess, graph, img_size, images_pl, logits = load_pretrain_model(
        model_name, is_explain=True)
    y_label = tf.placeholder(dtype=tf.int32, shape=())
    label_logits = logits[0, y_label]

    if len(args.imp) > 0:
        img = np.load(args.imp)
        init_epoch = int(args.imp[:-4].split('_')[-1])
        loss_list = list(np.load('loss_' + args.imp))
    else:
        img = PIL.Image.open(img_path)
        img = preprocess_img(img, img_size)
        init_epoch = 0
        loss_list = []

    old_img = np.array(img)
    batch_img = np.expand_dims(img, 0)

    #new_img = np.load('vgg16_30_0.0004_1000_0.001_0.03_4000.npy')
    #new_batch_img = np.concatenate((np.expand_dims(new_img,0),batch_img),axis=0)
    #new_batch_img = np.expand_dims(new_img,0)
    #all_img = np.concatenate((batch_img,new_batch_img))
    imagenet_label = load_imagenet_label(img_label_path)
    prob = tf.nn.softmax(logits)
    _prob = sess.run(prob, feed_dict={images_pl: batch_img})[0]
    #classify(img,_prob,imagenet_label,1,1)

    ####
    #deep explain
    # from deepexplain.tensorflow import DeepExplain
    # label_logits = logits[0,208]
    # with DeepExplain(session=sess) as de:
    #     attributions = {
    #         # Gradient-based
    #         # NOTE: reduce_max is used to select the output unit for the class predicted by the classifier
    #         # For an example of how to use the ground-truth labels instead, see mnist_cnn_keras notebook
    #         'Saliency maps': de.explain('saliency', label_logits, images_pl, batch_img),
    #         'Gradient * Input': de.explain('grad*input', label_logits, images_pl, batch_img),
    #         # 'Integrated Gradients': de.explain('intgrad', label_logits, images_pl, new_batch_img),
    #         'Epsilon-LRP': de.explain('elrp', label_logits, images_pl, batch_img),
    #         'DeepLIFT (Rescale)': de.explain('deeplift', label_logits, images_pl, batch_img),
    #         # Perturbation-based (comment out to evaluate, but this will take a while!)
    #         #'Occlusion [15x15]':    de.explain('occlusion', label_logits, images_pl, batch_img, window_shape=(15,15,3), step=4)
    #     }    ####
    #     new_attributions = {
    #         # Gradient-based
    #         # NOTE: reduce_max is used to select the output unit for the class predicted by the classifier
    #         # For an example of how to use the ground-truth labels instead, see mnist_cnn_keras notebook
    #         'Saliency maps': de.explain('saliency', label_logits, images_pl, new_batch_img),
    #         'Gradient * Input': de.explain('grad*input', label_logits, images_pl, new_batch_img),
    #         # 'Integrated Gradients': de.explain('intgrad', label_logits, images_pl, new_batch_img),
    #         'Epsilon-LRP': de.explain('elrp', label_logits, images_pl, new_batch_img),
    #         'DeepLIFT (Rescale)': de.explain('deeplift', label_logits, images_pl, new_batch_img),
    #         # Perturbation-based (comment out to evaluate, but this will take a while!)
    #         #'Occlusion [15x15]':    de.explain('occlusion', label_logits, images_pl, batch_img, window_shape=(15,15,3), step=4)
    #     }    ####
    #     attributions['Saliency maps'] = np.concatenate((attributions['Saliency maps'],new_attributions['Saliency maps']),axis=0)
    #     attributions['Gradient * Input'] = np.concatenate((attributions['Gradient * Input'],new_attributions['Gradient * Input']),axis=0)
    #     attributions['Epsilon-LRP'] = np.concatenate((attributions['Epsilon-LRP'],new_attributions['Epsilon-LRP']),axis=0)
    #     attributions['DeepLIFT (Rescale)'] = np.concatenate((attributions['DeepLIFT (Rescale)'],new_attributions['DeepLIFT (Rescale)']),axis=0)
    #
    # n_cols = int(len(attributions)) + 1
    # n_rows = 2
    # fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(3 * n_cols, 3 * n_rows))
    #
    # for i, xi in enumerate(all_img):
    #     # xi = (xi - np.min(xi))
    #     # xi /= np.max(xi)
    #     ax = axes.flatten()[i * n_cols]
    #     ax.imshow(xi)
    #     ax.set_title('Original')
    #     ax.axis('off')
    #     for j, a in enumerate(attributions):
    #         axj = axes.flatten()[i * n_cols + j + 1]
    #         plot(attributions[a][i], xi=xi, axis=axj, dilation=.5, percentile=99, alpha=.2).set_title(a)
    ######
    label_logits = logits[0, 208]
    with DeepExplain(session=sess) as de:
        dlift = de.explain('deeplift', label_logits, images_pl, batch_img)

    grad_map_tensor = tf.gradients(label_logits, images_pl)[0]
    grad_map = sess.run(grad_map_tensor,
                        feed_dict={
                            images_pl: np.expand_dims(img, 0),
                            y_label: true_class
                        })

    gradient_saliency = saliency.GradientSaliency(graph, sess, label_logits,
                                                  images_pl)  # 1951/1874
    vanilla_mask_3d = gradient_saliency.GetMask(
        img, feed_dict={y_label: true_class})  # better
    vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d)

    # smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(img, feed_dict={y_label:true_class}) # much clear, 2204/2192
    # smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d)

    #
    # new_img = np.load('vgg16_60_70_35_45_30_0.0001_800_0.0_0.0_9000.npy')
    # new_grad_map = sess.run(grad_map_tensor,feed_dict={images_pl:np.expand_dims(new_img,0),y_label:true_class})
    # new_vanilla_mask_3d = gradient_saliency.GetMask(new_img, feed_dict={y_label:true_class}) # better
    # new_vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(new_vanilla_mask_3d)
    # new_smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(new_img, feed_dict={y_label:true_class}) # much clear, 2204/2192
    # new_smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(new_smoothgrad_mask_3d)

    #to_dec_center = (60,70)
    to_dec_center = (100, 65)
    #to_dec_radius = (35,45)
    to_dec_radius = (80, 60)
    to_inc_center = (120, 170)
    to_inc_radius = (40, 30)
    _map = vanilla_mask_grayscale
    print(calculate_region_importance(_map, to_dec_center, to_dec_radius))
    print(calculate_region_importance(_map, to_inc_center, to_inc_radius))

    # construct to_inc_region and to_dec_region
    to_dec_region = calculate_img_region_importance(grad_map_tensor,
                                                    to_dec_center,
                                                    to_dec_radius)
    to_inc_region = calculate_img_region_importance(grad_map_tensor,
                                                    to_inc_center,
                                                    to_inc_radius)

    # try NES (Natural evolutionary strategies)
    N = args.N
    sigma = args.sigma
    epsilon = round(args.eps, 2)
    epoch = args.epoch
    eta = args.lr
    #loss = to_dec_region/to_inc_region
    #old_loss = sess.run(loss,feed_dict={images_pl: np.expand_dims(img, 0), y_label: true_class})
    old_loss = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius,
                                       to_inc_center, to_inc_radius)
    num_list = '_'.join([
        'big', model_name,
        str(N),
        str(eta),
        str(epoch),
        str(sigma),
        str(epsilon)
    ])
    print(num_list)
    for i in range(epoch):
        delta = np.random.randn(int(N / 2), img_size * img_size * 3)
        delta = np.concatenate((delta, -delta), axis=0)
        grad_sum = 0
        f_value_list = []
        for idelta in delta:
            img_plus = np.clip(
                img + sigma * idelta.reshape(img_size, img_size, 3), 0, 1)
            #f_value = sess.run(loss,feed_dict={images_pl:np.expand_dims(img_plus,0),y_label:true_class})
            with DeepExplain(session=sess) as de:
                dlift = de.explain('deeplift', label_logits, images_pl,
                                   np.expand_dims(img_plus, 0))
            f_value = calculate_deeplift_loss(dlift, to_dec_center,
                                              to_dec_radius, to_inc_center,
                                              to_inc_radius)
            f_value_list.append(f_value)
            grad_sum += f_value * idelta.reshape(img_size, img_size, 3)
        grad_sum = grad_sum / (N * sigma)
        new_img = np.clip(
            np.clip(img - eta * grad_sum, old_img - epsilon,
                    old_img + epsilon), 0, 1)
        #new_loss, new_logits = sess.run([loss, logits],
        #                                feed_dict={images_pl: np.expand_dims(new_img, 0), y_label: true_class})
        with DeepExplain(session=sess) as de:
            dlift = de.explain('deeplift', label_logits, images_pl,
                               np.expand_dims(new_img, 0))
        new_loss = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius,
                                           to_inc_center, to_inc_radius)

        loss_list.append(new_loss)
        print("epoch:{} new:{}, old:{}, {}".format(i, new_loss, old_loss,
                                                   np.argmax(_prob)))
        sys.stdout.flush()
        img = np.array(new_img)
        if i % args.image_interval == 0:
            temp_name = num_list + '_' + str(i + init_epoch)
            np.save(temp_name, new_img)
        if i % args.image_interval == 0:
            np.save('loss_' + temp_name, loss_list)
    np.save(num_list + '_' + str(epoch + init_epoch), new_img)
    np.save('loss_' + num_list + '_' + str(epoch + init_epoch), loss_list)

Beispiel #26

0

Datei anzeigen

mask_imgs = np.array(mask_imgs)
mask_dilated_imgs = np.array(mask_dilated_imgs)
naive_imgs = np.array(naive_imgs)

# particular case -

style_img = style_imgs[file_index]
naive_img_o = naive_imgs[file_index]
mask_img = mask_imgs[file_index]
mask_dilated_img = mask_dilated_imgs[file_index]

mask_img = mask_dilated_img / 255.0
mask_img = np.expand_dims(mask_img, axis=0)
mask_img = tf.cast(mask_img, tf.float32)

naive_img = K.variable(utils.preprocess_img(naive_img_o))
style_img = K.variable(utils.preprocess_img(style_img))
img_rows, img_cols = naive_img.shape[1], naive_img.shape[2]

fusion_img = K.placeholder((1, img_rows, img_cols, 3))

# combine the 3 images into a single Keras tensor
input_tensor = K.concatenate([naive_img, style_img, fusion_img], axis=0)

# build the vgg16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False)
print('Model loaded.')
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

Beispiel #27

0

Datei anzeigen

def run_gan(dataset, discriminator, generator, num_epoch=10):
    """Helper function for training GANs"""
    tf.reset_default_graph()

    # number of images for each batch
    batch_size = 128
    # noise dimension
    noise_dim = 96

    # shape of train images
    img_shape = list(dataset[0].shape)
    height = img_shape[0]
    width = img_shape[1]
    channels = img_shape[2]

    # check image shape
    assert height == 32, 'Error: image height should be 32'
    assert width == 32, 'Error: image width  should be 32'

    # placeholder for images from the training dataset
    placeholder_size = [None] + img_shape
    x = tf.placeholder(tf.float32, placeholder_size)
    # random noise fed into our generator
    z = sample_noise(batch_size, noise_dim)
    # generated images
    G_sample = generator(z, channels)

    with tf.variable_scope('') as scope:
        img_preproc = preprocess_img(x)
        logits_real = discriminator(img_preproc)
        # Re-use discriminator weights on new inputs
        scope.reuse_variables()
        logits_fake = discriminator(G_sample)

    # get solvers
    D_solver, G_solver = get_solvers()

    # get discriminator and generator loss
    D_loss, G_loss = gan_loss(logits_real, logits_fake)

    # Get the list of variables for the discriminator and generator
    D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               'discriminator')
    G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')

    # setup training steps
    D_train_step = D_solver.minimize(D_loss, var_list=D_vars)
    G_train_step = G_solver.minimize(G_loss, var_list=G_vars)

    with get_session() as sess:
        sess.run(tf.global_variables_initializer())
        train_gan(sess,
                  G_train_step,
                  G_loss,
                  G_sample,
                  D_train_step,
                  D_loss,
                  x,
                  dataset,
                  batch_size=batch_size,
                  num_epoch=num_epoch)

Beispiel #28

0

Datei anzeigen

import utils
import cv2
import os
import json
import constant

if __name__ == "__main__":  
    imgurl = 'input\\13.png'
    img = cv2.imread(imgurl)
    img = utils.preprocess_img(img)

    # get bounding box of all rectangles
    bounding_boxes = utils.box_extraction(img)
    print(f'[INFO] bounding_boxes: {len(bounding_boxes)}')

    # get invoice data
    data = utils.get_invoice_data(bounding_boxes, img)

    # write output to json
    base = os.path.basename(imgurl)
    output = constant.OUTPUT_PATH + os.path.splitext(base)[0] + '.json'
    with open(output, 'w+') as fp:
        json.dump(data, fp, indent=4)
    print(f'[INFO] exported to {output}')

Beispiel #29

0

Datei anzeigen

Datei: plt_curve.py Projekt: wofmanaf/Group-CAM

def main():
    args = parse_args()
    raw_img = cv2.imread(args.input, 1)
    raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR)

    raw_img = np.float32(raw_img) / 255
    image, norm_image = preprocess_img(raw_img)
    model = models.__dict__[args.arch](pretrained=True).eval()
    model = model.cuda()

    rise = RISE(model, input_size=(224, 224), batch_size=40)
    rise.generate_masks()
    gd = GradCAM(model, target_layer=args.target_layer)
    gc = GroupCAM(model, target_layer=args.target_layer)

    rise_heatmap = rise(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    gd_heatmap = gd(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    gc_heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data

    if args.output is not None:
        rise_cam = show_cam(image, rise_heatmap, "rise_base.png")
        gd_cam = show_cam(image, gd_heatmap, "gd_base.png")
        gc_cam = show_cam(image, gc_heatmap, "gc_base.png")

    if args.ins_del:
        blur = lambda x: gaussian_blur2d(
            x, kernel_size=(51, 51), sigma=(50., 50.))
        insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur)
        deletion = CausalMetric(model,
                                'del',
                                224 * 2,
                                substrate_fn=torch.zeros_like)

        norm_image = norm_image.cpu()
        gd_heatmap = gd_heatmap.cpu().numpy()
        gc_heatmap = gc_heatmap.cpu().numpy()
        rise_heatmap = rise_heatmap.cpu().numpy()

        gc_ins_score = insertion.evaluate(norm_image,
                                          mask=gc_heatmap,
                                          cls_idx=None)
        gd_ins_score = insertion.evaluate(norm_image,
                                          mask=gd_heatmap,
                                          cls_idx=None)
        rise_ins_score = insertion.evaluate(norm_image,
                                            mask=rise_heatmap,
                                            cls_idx=None)

        gc_del_score = deletion.evaluate(norm_image,
                                         mask=gc_heatmap,
                                         cls_idx=None)
        gd_del_score = deletion.evaluate(norm_image,
                                         mask=gd_heatmap,
                                         cls_idx=None)
        rise_del_score = deletion.evaluate(norm_image,
                                           mask=rise_heatmap,
                                           cls_idx=None)

        legend = ["RISE", "Grad-CAM", "Group-CAM"]
        ins_scores = [
            auc(rise_ins_score),
            auc(gd_ins_score),
            auc(gc_ins_score)
        ]
        del_scores = [
            auc(rise_del_score),
            auc(gd_del_score),
            auc(gc_del_score)
        ]
        ins_scores = [round(i * 100, 2) for i in ins_scores]
        del_scores = [round(i * 100, 2) for i in del_scores]
        ins_legend = [i + ": " + str(j) for i, j in zip(legend, ins_scores)]
        del_legend = [i + ": " + str(j) for i, j in zip(legend, del_scores)]

        n_steps = len(gd_ins_score)

        x = np.arange(n_steps) / n_steps
        plt.figure(figsize=(12, 5))

        plt.xlim(-0.1, 1.1)
        plt.ylim(0, 1.05)

        plt.subplot(121)
        plt.plot(x, rise_ins_score)
        plt.plot(x, gd_ins_score)
        plt.plot(x, gc_ins_score)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.legend(ins_legend, loc='best', fontsize=15)
        plt.title("Insertion Curve", fontsize=15)

        plt.subplot(122)
        plt.plot(x, rise_del_score)
        plt.plot(x, gd_del_score)
        plt.plot(x, gc_del_score)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.legend(del_legend, loc='best', fontsize=15)
        plt.title("Deletion Curve", fontsize=15)
        plt.show()

Beispiel #30

0

Datei anzeigen

Datei: environment.py Projekt: cedricsimar/Learning-Chaos

 def screen(self):
     return (utils.preprocess_img(self._previous_screen, self._screen))