def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) image_array = preprocess_img(np.asarray(image)) steering_angle = float( model.predict(image_array[None, :, :, :], batch_size=1)) min_speed = 8 max_speed = 10 if float(speed) < min_speed: throttle = 1.0 elif float(speed) > max_speed: throttle = -1.0 else: throttle = 0.1 print(steering_angle, throttle) send_control(steering_angle, throttle) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def run(self, img): '''yolov5 trt inference func :param img: np img :return: dst_list : [(x1,y1,x2,y2,label,conf),...] ''' dst_list = [] # pre process resize_img = preprocess_img(img) resize_img = torch.from_numpy(resize_img).to(self.device) # inference output = self.model(resize_img) # post process pred = self.post_process(output) for i, det in enumerate(pred): # detections per image if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(resize_img.shape[2:], det[:, :4], img.shape).round() for *xyxy, conf, cls in reversed(det): if float('%.2f' % conf) > self.conf_thresh: x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int( xyxy[2]), int(xyxy[3]) # label is self.names[int(cls)], score is conf dst_list.append((x1, y1, x2, y2, self.names[int(cls)], float('%.2f' % conf))) return dst_list
def run(self, img): resize_img = preprocess_img(img, self.scale_factor) output = self.trt.run(resize_img) full_mask = self.post_process(output, img.size) # full_mask = full_mask > self.conf_thresh mask_image = self.mask_to_image(full_mask) return mask_image
def test_ten_point(render_ground_truth=False, render_reconstruction=False): """ A simple test to check if we can recover the 3D positions of 10 known 3D points and camera parameters given two images of the points where the correspondences are known to be correct. """ points, colors = get_points() # get some known 3D points, each with a color camera_params, focal_x, focal_y, rows, cols = get_cameras() # get some known cameras # project the 3d points into each camera cam_1_points2d = project(points, camera_params[np.asarray([0 for _ in points])], focal_x, focal_y) cam_2_points2d = project(points, camera_params[np.asarray([1 for _ in points])], focal_x, focal_y) # draw the projected points in the camera images cam_1_img = utils.draw_points2d(cam_1_points2d, colors, rows, cols, show=False) cam_2_img = utils.draw_points2d(cam_2_points2d, colors, rows, cols, show=False) # find correspondences between the two images kp1, kp2, n_kp1, n_kp2 = matcher.find_matching_points_mock(utils.preprocess_img(cam_1_img), utils.preprocess_img(cam_2_img)) assert len(kp1) == len(n_kp1) == len(kp2) == len(n_kp2) == len(points) # keep track of which correspondence maps to which color kp_to_color = {i: cam_1_img[kp[1], kp[0]] for i, kp in enumerate(kp1)} if render_ground_truth: # show the ground truth geometry render_pts_and_cams(points, colors, camera_params[:, 3:], camera_params[:, :3], focal_x, use_spheres=True) # run the solver with the correspondences to generate a reconstruction camera_kps = np.stack([n_kp1, n_kp2], axis=0) camera_params, points_3d, camera_indices, point_indices, points_2d, focal_length = \ solver.get_solver_params(camera_kps) recon_camera_params, recon_3d_points, recon_focal_length, _ = solver.run_solver( camera_params, points_3d, camera_indices, point_indices, points_2d, focal_length, toss_outliers=False) recon_colors = [kp_to_color[i] for i in range(len(points_3d))] if render_reconstruction: render_pts_and_cams(recon_3d_points, recon_colors, recon_camera_params[:, 3:], recon_camera_params[:, :3], recon_focal_length, use_spheres=True) check_image_match(recon_3d_points, recon_camera_params, recon_focal_length, recon_colors, points, camera_params, focal_x, colors, rows, cols)
def worker(q): ''' q: a multiprocessing.Queue object. Each item in the queue contains a tuple of url, image_bytes) a worker continously take images from the work queue, preprocess the image, send it to the prediction cluster, and write results to database in batches. ''' mydb = MySQLdb.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, passwd=DB_PASSWD, db=DB_NAME) mycursor = mydb.cursor() count = 0 val = [] sql = "INSERT INTO images (url, process_date, flag) VALUES (%s, %s, %s)" while True: # block=True: no exception will be thrown when queue is empty # timeout = 5 : timeout expection will be thrown after 5 seconds try: url, image_bytes = q.get(block=True, timeout=5) except: break if url == 'start': print('start processing at', time()) continue if url == 'done': break input = preprocess_img(image_bytes) payload = {"instances": [{'input_image': input.tolist()}]} r = requests.post(MODEL_SERVER, json=payload) flag = decode_response(r) process_date = datetime.datetime.today().strftime('%Y-%m-%d') val.append((url, process_date, flag)) count += 1 # save and commit to database after COMMIT_SIZE records are accumulated if count == COMMIT_SIZE: mycursor.executemany(sql, val) mydb.commit() val = [] count = 0 # save and commit the remaining records before closing connection if count > 0: mycursor.executemany(sql, val) mydb.commit() mycursor.close() mydb.close()
def inference_image(model, logger, img=np.array(Image.open(inf_img_src).convert('RGB')), compare=True, record=True, dpi=500): if compare: assert img.shape[1] == IMG_DIM * 2 img, mask = split_img(img, IMG_DIM) orig_img = img.copy() img = preprocess_img(img) img = img.to(device) # Inference: y_pred = model(img) y_pred = torch.argmax(y_pred, dim=1) y_pred = y_pred[0].cpu().detach().numpy() plt.figure(figsize=(IMG_DIM/dpi, IMG_DIM/dpi), dpi=dpi) plt.figimage(y_pred) plt.axis('off') buf = io.BytesIO() plt.savefig(buf, format='jpg', dpi=dpi) buf.seek(0) y_pred_out = Image.open(buf).resize((IMG_DIM, IMG_DIM), Image.LANCZOS).convert("RGB") y_pred_out = cv2.cvtColor(np.array(y_pred_out), cv2.COLOR_RGB2BGR) # compare if compare: # Get GT cluster_model = get_clustering_model(logger) mask = cv2.resize(mask, (IMG_DIM, IMG_DIM), interpolation=cv2.INTER_AREA) class_map = cluster_model.predict(mask.reshape(-1, 3)).reshape(IMG_DIM, IMG_DIM) # IoU intersection = np.logical_and(class_map, y_pred) union = np.logical_or(class_map, y_pred) iou_score = np.sum(intersection) / np.sum(union) # Visualize class_map_out = cv2.putText(mask, 'GT, IoU: {0}'.format(round(iou_score, 3)), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) y_pred_out = cv2.putText(y_pred_out, 'Prediction', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) orig_img = cv2.putText(orig_img, 'Image', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) y_pred_out = np.concatenate((orig_img, y_pred_out, class_map_out), axis=1) # Record if record: cv2.imwrite(inf_out_img_src, y_pred_out) logger.info("(3) Inference Finished. Output image: {0}".format(inf_out_img_src)) cv2.imshow("Output", y_pred_out) return y_pred_out
def transform_img(img_path, out_dir, img_size): img_np = preprocess_img(get_img(img_path, size=img_size)) img_np = np.expand_dims(img_np, 0) # generator gen = get_module(img_np.shape, ctx) gen.load_params(args.checkpoint) data = mx.nd.array(img_np) gen.forward(mx.io.DataBatch([data], [0]), is_train=False) save_file = os.path.basename(os.path.normpath(img_path)) save_output(gen, os.path.join(out_dir, save_file))
def ENAS(train_data): clf = ak.ImageClassifier(verbose=True) fold = FLAGS.enas_fold kf = KFold(n_splits=fold, shuffle=True, random_state=100) for _, test_index in kf.split(train_data): debug_data = train_data.iloc[test_index] break print('train size', debug_data.shape[0]) x_train = preprocess_img(debug_data['img']) category = debug_data['class_id'].unique() print('class size ', category.shape[0]) category_dict = dict((category[i], i) for i in range(category.shape[0])) y_train = debug_data['class_id'].apply(lambda id: category_dict[id]).values clf.fit(x_train, y_train, time_limit=FLAGS.enas_time)
def __getitem__(self, idx): img_path = self.img_paths[idx] img = cv2.imread(img_path) # mirror img with a 50% chance if self.mirror: if random.random() > 0.5: img = img[:, ::-1, :] # resize img = cv2.resize(img, (self.size, self.size)) # normalize img = preprocess_img(img) return torch.tensor(img.astype(np.float32))
def get_model_compatible_input(gray_frame, face): img_arr = utils.align_face(gray_frame, face, desiredLeftEye) img_arr = utils.preprocess_img(img_arr, resize=False) landmarks = shape_predictor( gray_frame, face, ) roi1, roi2 = utils.extract_roi1_roi2(gray_frame, landmarks) roi1 = np.expand_dims(roi1, 0) roi2 = np.expand_dims(roi2, 0) roi1 = roi1 / 255. roi2 = roi2 / 255. return [img_arr, roi1, roi2]
def yield_from_dir(in_dir): files = get_imagenames(in_dir) for fn, fpath in enumerate(files): if not args.gray: # Open image as a CxHxW torch.Tensor img = cv2.imread(fpath) # from HxWxC to CxHxW, RGB image img = (cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).transpose(2, 0, 1) else: # from HxWxC to CxHxW grayscale image (C=1) img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE) img, expanded_h, expanded_w = preprocess_img(img, expand_if_needed=False, expand_axis0=False) yield fpath, img
def __init__(self, config): """ Initializes the model :param config: A model configuration object of type Config """ self.config = config self.input_real, self.input_z = model_inputs(self.config.real_dim, self.config.z_dim) G_model = generator(self.input_z) logits_real = discriminator(preprocess_img(self.input_real)) logits_fake = discriminator(G_model, reuse=True) self.D_loss, self.G_loss = wgangp_loss(logits_real, logits_fake, self.config.batch_size, self.input_real, G_model) self.D_opt, self.G_opt = model_opt(self.D_loss, self.G_loss, self.config.lr, self.config.beta1)
def haar_detector(frame): gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) face_frame = np.zeros(gray_frame.shape, dtype="uint8") offset = 15 x_pos, y_pos = 10, 40 faces = cascade_detector.detectMultiScale(gray_frame, 1.32, 5) for idx, face in enumerate(faces): if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face), desiredLeftEye) face_frame = cv2.resize(img_arr, (48, 48), interpolation=cv2.INTER_CUBIC) img_arr = utils.preprocess_img(img_arr, resize=False) predicted_proba = model.predict(img_arr) predicted_label = np.argmax(predicted_proba[0]) x, y, w, h = face cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) text = f"Person {idx+1}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4) text = f"Person {idx+1} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) return frame, face_frame
def dlib_detector(frame_orig): frame = frame_orig.copy() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) offset = 15 x_pos, y_pos = 10, 40 faces = hog_detector(gray_frame) for idx, face in enumerate(faces): if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) img_arr = utils.align_face(gray_frame, face, desiredLeftEye) img_arr = utils.preprocess_img(img_arr, resize=False) predicted_proba = model.predict(img_arr) predicted_label = np.argmax(predicted_proba[0]) x, y, w, h = rect_to_bb(face) cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) text = f"Person {idx+1}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4) text = f"Person {idx+1} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) return frame
def main(): args = parse_args() raw_img = cv2.imread(args.input, 1) raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR) raw_img = np.float32(raw_img) / 255 image, norm_image = preprocess_img(raw_img) model = models.__dict__[args.arch](pretrained=True).eval() model = model.cuda() gc = GradCAM(model, target_layer=args.target_layer) heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data cam = show_cam(image, heatmap, args.output) if args.ins_del: blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.)) insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur) deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like) out_video_path = './VIDEO' check_path_exist(out_video_path) ins_path = os.path.join(os.path.join(out_video_path, "ins")) del_path = os.path.join(os.path.join(out_video_path, "del")) check_path_exist(ins_path) check_path_exist(del_path) norm_image = norm_image.cpu() heatmap = heatmap.cpu().numpy() ins_score = insertion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=ins_path) del_score = deletion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=del_path) print("\nDeletion - {:.5f}\nInsertion - {:.5f}".format(auc(del_score), auc(ins_score))) # generate video video_ins = os.path.join(ins_path, args.input.split('/')[-1].split('.')[0] + '.avi') video_del = os.path.join(del_path, args.input.split('/')[-1].split('.')[0] + '.avi') cmd_str_ins = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(ins_path, video_ins) cmd_str_del = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(del_path, video_del) os.system(cmd_str_ins) os.system(cmd_str_del)
def __getitem__(self, idx): img_path = self.img_paths[idx] img = cv2.imread(img_path) # center crop h, w = img.shape[:2] min_side = min(h, w) top, bot = (h - min_side)//2, h - (h - min_side)//2 left, right = (w - min_side) // 2, w - (w - min_side) // 2 img = img[top:bot, left:right, :] # mirror img with a 50% chance if self.mirror: if random.random() > 0.5: img = img[:, ::-1, :] # resize img = cv2.resize(img, (self.size, self.size)) # normalize img = preprocess_img(img) return torch.tensor(img.astype(np.float32))
def DNN_DataSet(self, df): """ """ return preprocess_img(df['img'])
def classify(image): model = get_model("efficientnet-b0") img = preprocess_img(image) return predict(model, img)
def dnn_detector(frame): frame_height = frame.shape[0] frame_width = frame.shape[1] blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False) net.setInput(blob) detections = net.forward() bboxes = [] idx = 0 offset = 15 x_pos, y_pos = 10, 40 gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) face_frame = np.zeros(gray_frame.shape, dtype="uint8") for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > conf_threshold: idx += 1 x1 = int(detections[0, 0, i, 3] * frame_width) y1 = int(detections[0, 0, i, 4] * frame_height) x2 = int(detections[0, 0, i, 5] * frame_width) y2 = int(detections[0, 0, i, 6] * frame_height) bboxes.append([x1, y1, x2, y2]) face = [x1, y1, x2 - x1, y2 - y1] if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face), desiredLeftEye) face_frame = cv2.resize(img_arr, (48, 48), interpolation=cv2.INTER_CUBIC) img_arr = utils.preprocess_img(img_arr, resize=False) predicted_proba = model.predict(img_arr) predicted_label = np.argmax(predicted_proba[0]) cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) text = f"Person {idx}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x1 + 5, y1, font_scale=0.4) text = f"Person {idx} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) return frame, face_frame
def main(args): for arg in vars(args): print(arg, getattr(args, arg)) model_name = args.model_name img_path = args.img_path img_label_path = 'imagenet.json' true_class = args.true_label adversarial_label = args.adv_label demo_epoch = args.epoch demo_eps = args.eps demo_lr = args.lr label_num = args.label_num lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss # load model sess, graph, img_size, images_v, logits = load_pretrain_model(model_name) probs = tf.nn.softmax(logits) print("sucessfully load model") if args.write_summary: unique_path_name = "up{}down{}ce{}epoch{}lr{}".format( args.lambda_up, args.lambda_down, args.lambda_label_loss, args.epoch, args.lr) final_summary_path = os.path.join(args.summary_path, unique_path_name) if not os.path.exists(final_summary_path): os.makedirs(final_summary_path) summary_writer = tf.summary.FileWriter(final_summary_path, graph) global_step = tf.Variable(0, name="global_step", trainable=False) step_init = tf.variables_initializer([global_step]) y_hat = tf.placeholder(tf.int32, ()) label_logits = tf.gather_nd(logits, [[0, y_hat]]) img = PIL.Image.open(img_path) img = preprocess_img(img, img_size) batch_img = np.expand_dims(img, 0) imagenet_label = load_imagenet_label(img_label_path) # ------------------- # Step 1: classify the image with original model p = sess.run(probs, feed_dict={images_v: batch_img})[0] predict_label = np.argmax(p) #classify(img, p, imagenet_label, correct_class=true_class, is_cluster=True) # ------------------- # Step 2: Construct adversarial examples image_pl = tf.placeholder(tf.float32, (1, img_size, img_size, 3)) assign_op = tf.assign(images_v, image_pl) learning_rate = tf.placeholder(tf.float32, ()) var_eps = tf.placeholder(tf.float32, ()) labels = tf.one_hot(y_hat, label_num) loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)[0] projected = tf.clip_by_value( (tf.clip_by_value(images_v, image_pl - var_eps, image_pl + var_eps)), 0, 1) with tf.control_dependencies([projected]): project_step = tf.assign(images_v, projected) # initialization step _ = sess.run([assign_op, step_init], feed_dict={image_pl: batch_img}) # construct targeted attack # feed_dict_optim = {image_pl:batch_img, # y_hat:adversarial_label, # learning_rate:demo_lr} # # feed_dict_proj = {image_pl:batch_img, # var_eps:demo_eps} # optim_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, var_list=[images_v]) # model_train(sess=sess, # optim_step=optim_step, # project_step=project_step, # loss=loss, # feed_dict_optim=feed_dict_optim, # feed_dict_project=feed_dict_proj, # epoch=10) # # adv_img = np.squeeze(images_v.eval(),0) # adv_prob = sess.run(probs,feed_dict={images_v:np.expand_dims(adv_img,0)}) # classify(adv_img, adv_prob[0],imagenet_label,correct_class=281,target_class=adversarial_label) # # # show the saliency map # feed_dict_gradient = {y_hat:true_class} # _ = show_gradient_map(graph=graph, # sess=sess, # y=label_logits, # x=images_v, # img=img, # is_integrated=False, # is_smooth=False, # feed_dict=feed_dict_gradient) #--------------- # use gradient descent to control the saliency map # original gradient intensity map3D, map_grey = show_gradient_map(graph=graph, sess=sess, y=label_logits, x=images_v, img=img, is_integrated=False, is_smooth=True, feed_dict={y_hat: true_class}, is_cluster=args.is_cluster) center_more, radius_more = (100, 110), 10 center_less, radius_less = (100, 70), 10 gradient_more = calculate_region_importance(map_grey, center_more, radius_more) gradient_less = calculate_region_importance(map_grey, center_less, radius_less) print( "region 1 gradient intensity %.3f, region 2 gradient intensity %.3f" % (gradient_more, gradient_less)) # construct new loss function grad_map = tf.gradients(label_logits, images_v)[0] to_down_gradient = calculate_img_region_importance(grad_map, center_more, radius_more) to_up_gradient = calculate_img_region_importance(grad_map, center_less, radius_less) grad_loss = -lambda_up * to_up_gradient + lambda_down * to_down_gradient final_loss = grad_loss + lambda_label_loss * loss if args.write_summary: up_gradient_summary = tf.summary.scalar("up_gradient", to_up_gradient) down_gradient_summary = tf.summary.scalar("down_gradient", to_down_gradient) loss_summary = tf.summary.scalar("loss", loss) train_summary_op = tf.summary.merge_all() change_grad_optim_step = tf.train.GradientDescentOptimizer( learning_rate=demo_lr).minimize(final_loss, var_list=[images_v], global_step=global_step) for i in range(demo_epoch): if args.write_summary: _, _loss, step, summary_str = sess.run([ change_grad_optim_step, final_loss, global_step, train_summary_op ], feed_dict={ image_pl: batch_img, y_hat: true_class, learning_rate: demo_lr }) summary_writer.add_summary(summary_str, global_step=step) else: _, _loss, step = sess.run( [change_grad_optim_step, final_loss, global_step], feed_dict={ image_pl: batch_img, y_hat: true_class, learning_rate: demo_lr }) sess.run([project_step], feed_dict={ image_pl: batch_img, var_eps: demo_eps }) print("%d loss = %g" % (i, _loss)) if i % args.image_interval == 0: adv_img = np.squeeze(images_v.eval(), 0) # check the prediction result p_adv = sess.run(probs, feed_dict={images_v: batch_img})[0] predict_label_adv = np.argmax(p_adv) #classify(adv_img, p_adv, imagenet_label, correct_class=true_class,is_cluster=args.is_cluster) # check the gradient map map3D_adv, map_grey_adv = show_gradient_map( graph=graph, sess=sess, y=label_logits, x=images_v, img=adv_img, is_integrated=False, is_smooth=False, feed_dict={y_hat: true_class}, is_cluster=args.is_cluster) adv_gradient_more = calculate_region_importance( map_grey_adv, center_more, radius_more) adv_gradient_less = calculate_region_importance( map_grey_adv, center_less, radius_less) if args.write_summary: map_grey_adv = tf.expand_dims(tf.expand_dims(map_grey_adv, 0), 3) adv_map_sum = tf.summary.image( 'adv_map' + str(i), tf.convert_to_tensor(map_grey_adv)) adv_str = sess.run(adv_map_sum) summary_writer.add_summary(adv_str) print( "Adversarial Case: predict label: %d, big region gradient intensity: %.3f, small region gradient intensity: %.3f" % (predict_label_adv, adv_gradient_more, adv_gradient_less)) print( "Normal Case: predict label: %d, big region gradient intensity: %.3f, small region gradient intensity: %.3f" % (predict_label, gradient_more, gradient_less)) # write original map map_grey = tf.expand_dims(tf.expand_dims(map_grey, 0), 3) orig_map_sum = tf.summary.image('orig_map', tf.convert_to_tensor(map_grey)) orig_str = sess.run(orig_map_sum) summary_writer.add_summary(orig_str)
parser.add_argument('path', type=str, nargs=1, help='path to pdf file', metavar='--p') args = parser.parse_args() pdf_path = args.path[0] doc = fitz.open(pdf_path) clf = joblib.load('SVMcls.pkl') for i, page in enumerate(doc): print('Converting page no. {} to image'.format(i + 1)) zoom = 2 # zoom factor mat = fitz.Matrix(zoom, zoom) pixmap = page.getPixmap(matrix=mat) page_im = pix2np(pixmap) prep_im = preprocess_img(page_im) rects = get_bounding_rects(prep_im) bboxes = [] print('Detected {} candidate segments.'.format(len(rects))) for rect in rects: x, y, w, h = rect crop = prep_im[y:y + h, x:x + w] hist = np.reshape(get_hist(crop), (1, -1)) pred = clf.predict(hist) if pred == 1: bboxes.append((x, y, w, h)) print('Found {} handsigns at page {}'.format(len(bboxes), i + 1)) if bboxes: for bbox in bboxes: x, y, w, h = bbox cv2.rectangle(page_im, (x, y), (x + w, y + h), (0, 255, 0), 2)
args = parse_arguments('') model_name = args.model_name img_path = args.img_path img_label_path = 'imagenet.json' true_class = args.true_label adversarial_label = args.adv_label label_num = args.label_num lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss sess, graph, img_size, images_pl, logits = load_pretrain_model(model_name, is_explain=True) y_label = tf.placeholder(dtype=tf.int32, shape=()) img_label = load_imagenet_label(img_label_path) img = PIL.Image.open(img_path) img = preprocess_img(img, img_size) #new_img = np.load('big_vgg16_30_0.0001_1000_0.001_0.03_3000.npy') # 258 new_img = np.load('vgg16_60_70_35_45_30_0.0001_800_0.0_0.0_9000.npy') # 208 batch_img = np.expand_dims(img, 0) new_batch_img = np.expand_dims(new_img, 0) true_class = 208 label_logits = logits[0, true_class] gradient_saliency = saliency.GradientSaliency(graph, sess, label_logits, images_pl) # 1951/1874 attributions = OrderedDict() with DeepExplain(session=sess) as de: ori_attributions = { # Gradient-based
if frames_q: imgs = image2pipe.utils.yield_from_queue(frames_q) else: # Get ordered list of filenames print("\tOpen sequence in folder: ", args.read_path) imgs = yield_from_dir(args.read_path) seq_list = [] seq_outnames = [] for fn_or_fpath, img in imgs: if type(fn_or_fpath) is int: fpath = "%06d.png" % fn_or_fpath # from HxWxC to CxHxW, RGB image img = img.transpose(2, 0, 1) img, expanded_h, expanded_w = preprocess_img( img, expand_if_needed=False, expand_axis0=False) else: fpath = fn_or_fpath print("Load img:", fpath, img.shape) seq_list.append(img) seq_outnames.append(os.path.basename(fpath)) seq = np.stack(seq_list, axis=0) # return seq, expanded_h, expanded_w if len(seq_list) == NUM_IN_FR_EXT: print("Infer batch ...") seq = torch.from_numpy(seq).to(device) seq_time = time.time()
mod.symbol.save(model_save_path + '.json') if __name__ == "__main__": parser = build_parser() args = parser.parse_args() check_opts(args) # init ctx = mx.gpu(args.gpu) if args.gpu >= 0 else mx.cpu() ctx = mx.cpu() vgg_params = mx.nd.load(args.vgg_path) # init style print('load style image', args.style_image) style_np = preprocess_img(get_img(args.style_image)) style_np = np.expand_dims(style_np, 0) dshape = style_np.shape style_exec = get_style_excutor(vgg_params, dshape, ctx) style_exec.data[:] = mx.nd.array(style_np) style_exec.executor.forward() style_array = [ mx.nd.repeat(arr.copyto(ctx), axis=0, repeats=args.batch_size) for arr in style_exec.outputs ] del style_exec # TRAIN_SHAPE = (256, 256) dshape = (args.batch_size, 3, *TRAIN_SHAPE)
def main(args): for arg in vars(args): print(arg, getattr(args, arg)) model_name = args.model_name img_path = args.img_path img_label_path = 'imagenet.json' true_class = args.true_label adversarial_label = args.adv_label label_num = args.label_num lambda_up, lambda_down, lambda_label_loss = args.lambda_up, args.lambda_down, args.lambda_label_loss # model_name = 'inception_v3' # img_path = './picture/dog_cat.jpg' # img_label_path = 'imagenet.json' # true_class = 208 sess, graph, img_size, images_pl, logits = load_pretrain_model( model_name, is_explain=True) y_label = tf.placeholder(dtype=tf.int32, shape=()) label_logits = logits[0, y_label] if len(args.imp) > 0: img = np.load(args.imp) init_epoch = int(args.imp[:-4].split('_')[-1]) loss_list = list(np.load('loss_' + args.imp)) else: img = PIL.Image.open(img_path) img = preprocess_img(img, img_size) init_epoch = 0 loss_list = [] old_img = np.array(img) batch_img = np.expand_dims(img, 0) #new_img = np.load('vgg16_30_0.0004_1000_0.001_0.03_4000.npy') #new_batch_img = np.concatenate((np.expand_dims(new_img,0),batch_img),axis=0) #new_batch_img = np.expand_dims(new_img,0) #all_img = np.concatenate((batch_img,new_batch_img)) imagenet_label = load_imagenet_label(img_label_path) prob = tf.nn.softmax(logits) _prob = sess.run(prob, feed_dict={images_pl: batch_img})[0] #classify(img,_prob,imagenet_label,1,1) #### #deep explain # from deepexplain.tensorflow import DeepExplain # label_logits = logits[0,208] # with DeepExplain(session=sess) as de: # attributions = { # # Gradient-based # # NOTE: reduce_max is used to select the output unit for the class predicted by the classifier # # For an example of how to use the ground-truth labels instead, see mnist_cnn_keras notebook # 'Saliency maps': de.explain('saliency', label_logits, images_pl, batch_img), # 'Gradient * Input': de.explain('grad*input', label_logits, images_pl, batch_img), # # 'Integrated Gradients': de.explain('intgrad', label_logits, images_pl, new_batch_img), # 'Epsilon-LRP': de.explain('elrp', label_logits, images_pl, batch_img), # 'DeepLIFT (Rescale)': de.explain('deeplift', label_logits, images_pl, batch_img), # # Perturbation-based (comment out to evaluate, but this will take a while!) # #'Occlusion [15x15]': de.explain('occlusion', label_logits, images_pl, batch_img, window_shape=(15,15,3), step=4) # } #### # new_attributions = { # # Gradient-based # # NOTE: reduce_max is used to select the output unit for the class predicted by the classifier # # For an example of how to use the ground-truth labels instead, see mnist_cnn_keras notebook # 'Saliency maps': de.explain('saliency', label_logits, images_pl, new_batch_img), # 'Gradient * Input': de.explain('grad*input', label_logits, images_pl, new_batch_img), # # 'Integrated Gradients': de.explain('intgrad', label_logits, images_pl, new_batch_img), # 'Epsilon-LRP': de.explain('elrp', label_logits, images_pl, new_batch_img), # 'DeepLIFT (Rescale)': de.explain('deeplift', label_logits, images_pl, new_batch_img), # # Perturbation-based (comment out to evaluate, but this will take a while!) # #'Occlusion [15x15]': de.explain('occlusion', label_logits, images_pl, batch_img, window_shape=(15,15,3), step=4) # } #### # attributions['Saliency maps'] = np.concatenate((attributions['Saliency maps'],new_attributions['Saliency maps']),axis=0) # attributions['Gradient * Input'] = np.concatenate((attributions['Gradient * Input'],new_attributions['Gradient * Input']),axis=0) # attributions['Epsilon-LRP'] = np.concatenate((attributions['Epsilon-LRP'],new_attributions['Epsilon-LRP']),axis=0) # attributions['DeepLIFT (Rescale)'] = np.concatenate((attributions['DeepLIFT (Rescale)'],new_attributions['DeepLIFT (Rescale)']),axis=0) # # n_cols = int(len(attributions)) + 1 # n_rows = 2 # fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(3 * n_cols, 3 * n_rows)) # # for i, xi in enumerate(all_img): # # xi = (xi - np.min(xi)) # # xi /= np.max(xi) # ax = axes.flatten()[i * n_cols] # ax.imshow(xi) # ax.set_title('Original') # ax.axis('off') # for j, a in enumerate(attributions): # axj = axes.flatten()[i * n_cols + j + 1] # plot(attributions[a][i], xi=xi, axis=axj, dilation=.5, percentile=99, alpha=.2).set_title(a) ###### label_logits = logits[0, 208] with DeepExplain(session=sess) as de: dlift = de.explain('deeplift', label_logits, images_pl, batch_img) grad_map_tensor = tf.gradients(label_logits, images_pl)[0] grad_map = sess.run(grad_map_tensor, feed_dict={ images_pl: np.expand_dims(img, 0), y_label: true_class }) gradient_saliency = saliency.GradientSaliency(graph, sess, label_logits, images_pl) # 1951/1874 vanilla_mask_3d = gradient_saliency.GetMask( img, feed_dict={y_label: true_class}) # better vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d) # smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(img, feed_dict={y_label:true_class}) # much clear, 2204/2192 # smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d) # # new_img = np.load('vgg16_60_70_35_45_30_0.0001_800_0.0_0.0_9000.npy') # new_grad_map = sess.run(grad_map_tensor,feed_dict={images_pl:np.expand_dims(new_img,0),y_label:true_class}) # new_vanilla_mask_3d = gradient_saliency.GetMask(new_img, feed_dict={y_label:true_class}) # better # new_vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(new_vanilla_mask_3d) # new_smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(new_img, feed_dict={y_label:true_class}) # much clear, 2204/2192 # new_smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(new_smoothgrad_mask_3d) #to_dec_center = (60,70) to_dec_center = (100, 65) #to_dec_radius = (35,45) to_dec_radius = (80, 60) to_inc_center = (120, 170) to_inc_radius = (40, 30) _map = vanilla_mask_grayscale print(calculate_region_importance(_map, to_dec_center, to_dec_radius)) print(calculate_region_importance(_map, to_inc_center, to_inc_radius)) # construct to_inc_region and to_dec_region to_dec_region = calculate_img_region_importance(grad_map_tensor, to_dec_center, to_dec_radius) to_inc_region = calculate_img_region_importance(grad_map_tensor, to_inc_center, to_inc_radius) # try NES (Natural evolutionary strategies) N = args.N sigma = args.sigma epsilon = round(args.eps, 2) epoch = args.epoch eta = args.lr #loss = to_dec_region/to_inc_region #old_loss = sess.run(loss,feed_dict={images_pl: np.expand_dims(img, 0), y_label: true_class}) old_loss = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius, to_inc_center, to_inc_radius) num_list = '_'.join([ 'big', model_name, str(N), str(eta), str(epoch), str(sigma), str(epsilon) ]) print(num_list) for i in range(epoch): delta = np.random.randn(int(N / 2), img_size * img_size * 3) delta = np.concatenate((delta, -delta), axis=0) grad_sum = 0 f_value_list = [] for idelta in delta: img_plus = np.clip( img + sigma * idelta.reshape(img_size, img_size, 3), 0, 1) #f_value = sess.run(loss,feed_dict={images_pl:np.expand_dims(img_plus,0),y_label:true_class}) with DeepExplain(session=sess) as de: dlift = de.explain('deeplift', label_logits, images_pl, np.expand_dims(img_plus, 0)) f_value = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius, to_inc_center, to_inc_radius) f_value_list.append(f_value) grad_sum += f_value * idelta.reshape(img_size, img_size, 3) grad_sum = grad_sum / (N * sigma) new_img = np.clip( np.clip(img - eta * grad_sum, old_img - epsilon, old_img + epsilon), 0, 1) #new_loss, new_logits = sess.run([loss, logits], # feed_dict={images_pl: np.expand_dims(new_img, 0), y_label: true_class}) with DeepExplain(session=sess) as de: dlift = de.explain('deeplift', label_logits, images_pl, np.expand_dims(new_img, 0)) new_loss = calculate_deeplift_loss(dlift, to_dec_center, to_dec_radius, to_inc_center, to_inc_radius) loss_list.append(new_loss) print("epoch:{} new:{}, old:{}, {}".format(i, new_loss, old_loss, np.argmax(_prob))) sys.stdout.flush() img = np.array(new_img) if i % args.image_interval == 0: temp_name = num_list + '_' + str(i + init_epoch) np.save(temp_name, new_img) if i % args.image_interval == 0: np.save('loss_' + temp_name, loss_list) np.save(num_list + '_' + str(epoch + init_epoch), new_img) np.save('loss_' + num_list + '_' + str(epoch + init_epoch), loss_list)
mask_imgs = np.array(mask_imgs) mask_dilated_imgs = np.array(mask_dilated_imgs) naive_imgs = np.array(naive_imgs) # particular case - style_img = style_imgs[file_index] naive_img_o = naive_imgs[file_index] mask_img = mask_imgs[file_index] mask_dilated_img = mask_dilated_imgs[file_index] mask_img = mask_dilated_img / 255.0 mask_img = np.expand_dims(mask_img, axis=0) mask_img = tf.cast(mask_img, tf.float32) naive_img = K.variable(utils.preprocess_img(naive_img_o)) style_img = K.variable(utils.preprocess_img(style_img)) img_rows, img_cols = naive_img.shape[1], naive_img.shape[2] fusion_img = K.placeholder((1, img_rows, img_cols, 3)) # combine the 3 images into a single Keras tensor input_tensor = K.concatenate([naive_img, style_img, fusion_img], axis=0) # build the vgg16 network with our 3 images as input # the model will be loaded with pre-trained ImageNet weights model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) print('Model loaded.') # get the symbolic outputs of each "key" layer (we gave them unique names). outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
def run_gan(dataset, discriminator, generator, num_epoch=10): """Helper function for training GANs""" tf.reset_default_graph() # number of images for each batch batch_size = 128 # noise dimension noise_dim = 96 # shape of train images img_shape = list(dataset[0].shape) height = img_shape[0] width = img_shape[1] channels = img_shape[2] # check image shape assert height == 32, 'Error: image height should be 32' assert width == 32, 'Error: image width should be 32' # placeholder for images from the training dataset placeholder_size = [None] + img_shape x = tf.placeholder(tf.float32, placeholder_size) # random noise fed into our generator z = sample_noise(batch_size, noise_dim) # generated images G_sample = generator(z, channels) with tf.variable_scope('') as scope: img_preproc = preprocess_img(x) logits_real = discriminator(img_preproc) # Re-use discriminator weights on new inputs scope.reuse_variables() logits_fake = discriminator(G_sample) # get solvers D_solver, G_solver = get_solvers() # get discriminator and generator loss D_loss, G_loss = gan_loss(logits_real, logits_fake) # Get the list of variables for the discriminator and generator D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') # setup training steps D_train_step = D_solver.minimize(D_loss, var_list=D_vars) G_train_step = G_solver.minimize(G_loss, var_list=G_vars) with get_session() as sess: sess.run(tf.global_variables_initializer()) train_gan(sess, G_train_step, G_loss, G_sample, D_train_step, D_loss, x, dataset, batch_size=batch_size, num_epoch=num_epoch)
import utils import cv2 import os import json import constant if __name__ == "__main__": imgurl = 'input\\13.png' img = cv2.imread(imgurl) img = utils.preprocess_img(img) # get bounding box of all rectangles bounding_boxes = utils.box_extraction(img) print(f'[INFO] bounding_boxes: {len(bounding_boxes)}') # get invoice data data = utils.get_invoice_data(bounding_boxes, img) # write output to json base = os.path.basename(imgurl) output = constant.OUTPUT_PATH + os.path.splitext(base)[0] + '.json' with open(output, 'w+') as fp: json.dump(data, fp, indent=4) print(f'[INFO] exported to {output}')
def main(): args = parse_args() raw_img = cv2.imread(args.input, 1) raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR) raw_img = np.float32(raw_img) / 255 image, norm_image = preprocess_img(raw_img) model = models.__dict__[args.arch](pretrained=True).eval() model = model.cuda() rise = RISE(model, input_size=(224, 224), batch_size=40) rise.generate_masks() gd = GradCAM(model, target_layer=args.target_layer) gc = GroupCAM(model, target_layer=args.target_layer) rise_heatmap = rise(norm_image.cuda(), class_idx=args.cls_idx).cpu().data gd_heatmap = gd(norm_image.cuda(), class_idx=args.cls_idx).cpu().data gc_heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data if args.output is not None: rise_cam = show_cam(image, rise_heatmap, "rise_base.png") gd_cam = show_cam(image, gd_heatmap, "gd_base.png") gc_cam = show_cam(image, gc_heatmap, "gc_base.png") if args.ins_del: blur = lambda x: gaussian_blur2d( x, kernel_size=(51, 51), sigma=(50., 50.)) insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur) deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like) norm_image = norm_image.cpu() gd_heatmap = gd_heatmap.cpu().numpy() gc_heatmap = gc_heatmap.cpu().numpy() rise_heatmap = rise_heatmap.cpu().numpy() gc_ins_score = insertion.evaluate(norm_image, mask=gc_heatmap, cls_idx=None) gd_ins_score = insertion.evaluate(norm_image, mask=gd_heatmap, cls_idx=None) rise_ins_score = insertion.evaluate(norm_image, mask=rise_heatmap, cls_idx=None) gc_del_score = deletion.evaluate(norm_image, mask=gc_heatmap, cls_idx=None) gd_del_score = deletion.evaluate(norm_image, mask=gd_heatmap, cls_idx=None) rise_del_score = deletion.evaluate(norm_image, mask=rise_heatmap, cls_idx=None) legend = ["RISE", "Grad-CAM", "Group-CAM"] ins_scores = [ auc(rise_ins_score), auc(gd_ins_score), auc(gc_ins_score) ] del_scores = [ auc(rise_del_score), auc(gd_del_score), auc(gc_del_score) ] ins_scores = [round(i * 100, 2) for i in ins_scores] del_scores = [round(i * 100, 2) for i in del_scores] ins_legend = [i + ": " + str(j) for i, j in zip(legend, ins_scores)] del_legend = [i + ": " + str(j) for i, j in zip(legend, del_scores)] n_steps = len(gd_ins_score) x = np.arange(n_steps) / n_steps plt.figure(figsize=(12, 5)) plt.xlim(-0.1, 1.1) plt.ylim(0, 1.05) plt.subplot(121) plt.plot(x, rise_ins_score) plt.plot(x, gd_ins_score) plt.plot(x, gc_ins_score) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.legend(ins_legend, loc='best', fontsize=15) plt.title("Insertion Curve", fontsize=15) plt.subplot(122) plt.plot(x, rise_del_score) plt.plot(x, gd_del_score) plt.plot(x, gc_del_score) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.legend(del_legend, loc='best', fontsize=15) plt.title("Deletion Curve", fontsize=15) plt.show()
def screen(self): return (utils.preprocess_img(self._previous_screen, self._screen))