def net_fatory(net_name, inputs, train_model, FC=False): if net_name == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): net, end_points = vgg.vgg_16(inputs, num_classes=None, is_training=train_model, fc_flage=FC) elif net_name == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): net, end_points = vgg.vgg_19(inputs, num_classes=None, is_training=train_model, fc_flage=FC) elif net_name == 'resnet_v2_50': with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(inputs=inputs, num_classes=None, is_training=train_model, global_pool=False) elif net_name == 'resnet_v2_152': with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_152(inputs=inputs, num_classes=None, is_training=train_model, global_pool=False) return net, end_points
def extract_image_features(inputs, reuse=True): with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19(inputs, spatial_squeeze=False, is_training=False, reuse=reuse) return end_points
def perceptual_loss(real, fake, network="vgg_16"): if params.loss.vgg_w <= 0.0: return 0.0 real = real * params.learning.image_std + params.learning.image_mean fake = fake * params.learning.image_std + params.learning.image_mean real = utils.perceptual_loss_image_preprocess(real) fake = utils.perceptual_loss_image_preprocess(fake) image = tf.concat([real, fake], axis=0) with tf.variable_scope("perceptual_loss"): if network == "vgg_16": with slim.arg_scope(vgg.vgg_arg_scope()): conv1, conv2, conv3 = vgg.vgg_16(image) elif network == "vgg_19": with slim.arg_scope(vgg.vgg_arg_scope()): conv1, conv2, conv3 = vgg.vgg_19(image) else: raise NotImplementedError("") losses = [] for i, features in enumerate([conv1, conv2, conv3]): real, fake = tf.split(features, 2, 0) losses.append(params.loss.perceptual_loss.weights[i] * tf.reduce_mean(tf.square(real - fake))) return losses[0] + losses[1] + losses[2]
def testNoClasses(self): batch_size = 5 height, width = 224, 224 num_classes = None with self.test_session(): inputs = tf.random_uniform((batch_size, height, width, 3)) net, end_points = vgg.vgg_19(inputs, num_classes) expected_names = [ 'vgg_19/conv1/conv1_1', 'vgg_19/conv1/conv1_2', 'vgg_19/pool1', 'vgg_19/conv2/conv2_1', 'vgg_19/conv2/conv2_2', 'vgg_19/pool2', 'vgg_19/conv3/conv3_1', 'vgg_19/conv3/conv3_2', 'vgg_19/conv3/conv3_3', 'vgg_19/conv3/conv3_4', 'vgg_19/pool3', 'vgg_19/conv4/conv4_1', 'vgg_19/conv4/conv4_2', 'vgg_19/conv4/conv4_3', 'vgg_19/conv4/conv4_4', 'vgg_19/pool4', 'vgg_19/conv5/conv5_1', 'vgg_19/conv5/conv5_2', 'vgg_19/conv5/conv5_3', 'vgg_19/conv5/conv5_4', 'vgg_19/pool5', 'vgg_19/fc6', 'vgg_19/fc7', ] self.assertSetEqual(set(end_points.keys()), set(expected_names)) self.assertTrue(net.op.name.startswith('vgg_19/fc7'))
def style_loss(self, styled_vgg, style_image, layer_names, style_weight, sess): style_image_placeholder = tf.placeholder('float', shape=style_image.shape) with slim.arg_scope(vgg.vgg_arg_scope(reuse=True)): _, style_image_vgg = vgg.vgg_19(style_image_placeholder, num_classes=0, is_training=False) style_loss = 0 preprocessed_style_image = style_image - np.array([ ctx.params.R_MEAN, ctx.params.G_MEAN, ctx.params.B_MEAN ]).reshape([1, 1, 1, 3]) for layer_name in layer_names: style_image_gram = self.gram_matrix_for_style_image( style_image_vgg[layer_name], style_image_placeholder, preprocessed_style_image, sess) input_image_gram = self.gram_matrix_for_input_image( styled_vgg[layer_name]) style_loss += (2 * tf.nn.l2_loss(input_image_gram - np.expand_dims(style_image_gram, 0)) / style_image_gram.size) return style_weight * style_loss
def main(): images_placeholder = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, 3)) _, end_points = vgg_19(images_placeholder, num_classes=None, is_training=False) dataset_image_codes, dataset_image_files = get_dataset_image_codes( images_placeholder, end_points) print(dataset_image_codes.shape) images = [os.path.join(FILES_DIR, f'image_{i}.jpg') for i in range(1, 5)] query_image_codes = get_query_image_code(images, images_placeholder, end_points) print(query_image_codes.shape) neighbors_count = 2 nearest_neighbors = NearestNeighbors( n_neighbors=neighbors_count, metric='cosine').fit(dataset_image_codes) _, indices = nearest_neighbors.kneighbors(query_image_codes) space = 10 result_image_size = ((neighbors_count + 1) * (IMAGE_SIZE + space) - space, len(images) * (IMAGE_SIZE + space) - space) result_image = Image.new('RGB', result_image_size, 'white') for i, filename in enumerate(images): query_image = rescale_image(Image.open(filename)) draw = ImageDraw.Draw(query_image) draw.line((0, 0, query_image.width - 1, 0, query_image.width - 1, query_image.height - 1, 0, query_image.height - 1, 0, 0), fill='red', width=1) result_image.paste(query_image, (0, i * (IMAGE_SIZE + space))) for j in range(neighbors_count): neighbor_image = Image.open(dataset_image_files[indices[i][j]]) result_image.paste(neighbor_image, ((j + 1) * (IMAGE_SIZE + space), i * (IMAGE_SIZE + space))) result_image.show() result_image.save(os.path.join(FILES_DIR, 'result.jpg'))
def _buildGraph(self): x_in = tf.placeholder(tf.float32, shape=[None, # enables variable batch size self.input_dim[0]], name="x") x_in_reshape = tf.reshape(x_in, [-1, self.input_dim[1], self.input_dim[2], 3]) dropout = tf.placeholder_with_default(1., shape=[], name="dropout") y_in = tf.placeholder(dtype=tf.int8, name="y") onehot_labels = tf.one_hot(indices=tf.cast(y_in, tf.int32), depth=2) is_train = tf.placeholder_with_default(True, shape=[], name="is_train") logits, nett, ww = vgg.vgg_19(x_in_reshape, num_classes=2, is_training=is_train, dropout_keep_prob=dropout, spatial_squeeze=True, scope='vgg19') pred = tf.nn.softmax(logits, name="prediction") global_step = tf.Variable(0, trainable=False) pred_cost = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels, logits=logits) tf.summary.scalar("InceptionV3_cost", pred_cost) train_op = tf.contrib.layers.optimize_loss( loss=pred_cost, learning_rate=self.learning_rate, global_step=global_step, optimizer="Adam") merged_summary = tf.summary.merge_all() return (x_in, dropout, is_train, y_in, logits, nett, ww, pred, pred_cost, global_step, train_op, merged_summary)
def train_model(): torch.backends.cudnn.deterministic = True device = torch.device("cuda") print("CUDA visible devices: " + str(torch.cuda.device_count())) print("CUDA Device Name: " + str(torch.cuda.get_device_name(device))) # Creating dataset loaders train_dataset = LoadData(dataset_dir, TRAIN_SIZE, dslr_scale, test=False) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True, drop_last=True) test_dataset = LoadData(dataset_dir, TEST_SIZE, dslr_scale, test=True) test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) visual_dataset = LoadVisualData(dataset_dir, 10, dslr_scale, level) visual_loader = DataLoader(dataset=visual_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=False) # Creating image processing network and optimizer generator = PyNET(level=level, instance_norm=True, instance_norm_level_1=True).to(device) generator = torch.nn.DataParallel(generator) optimizer = Adam(params=generator.parameters(), lr=learning_rate) # Restoring the variables if level < 5: generator.load_state_dict( torch.load("models/pynet_level_" + str(level + 1) + "_epoch_" + str(restore_epoch) + ".pth"), strict=False) # Losses VGG_19 = vgg_19(device) MSE_loss = torch.nn.MSELoss() MS_SSIM = MSSSIM() # Train the network for epoch in range(num_train_epochs): torch.cuda.empty_cache() train_iter = iter(train_loader) for i in range(len(train_loader)): optimizer.zero_grad() x, y = next(train_iter) x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) enhanced = generator(x) # MSE Loss loss_mse = MSE_loss(enhanced, y) # VGG Loss if level < 5: enhanced_vgg = VGG_19(normalize_batch(enhanced)) target_vgg = VGG_19(normalize_batch(y)) loss_content = MSE_loss(enhanced_vgg, target_vgg) # Total Loss if level == 5 or level == 4: total_loss = loss_mse if level == 3 or level == 2: total_loss = loss_mse * 10 + loss_content if level == 1: total_loss = loss_mse * 10 + loss_content if level == 0: loss_ssim = MS_SSIM(enhanced, y) total_loss = loss_mse + loss_content + (1 - loss_ssim) * 0.4 # Perform the optimization step total_loss.backward() optimizer.step() if i == 0: # Save the model that corresponds to the current epoch generator.eval().cpu() torch.save( generator.state_dict(), "models/pynet_level_" + str(level) + "_epoch_" + str(epoch) + ".pth") generator.to(device).train() # Save visual results for several test images generator.eval() with torch.no_grad(): visual_iter = iter(visual_loader) for j in range(len(visual_loader)): torch.cuda.empty_cache() raw_image = next(visual_iter) raw_image = raw_image.to(device, non_blocking=True) enhanced = generator(raw_image.detach()) enhanced = np.asarray( to_image(torch.squeeze(enhanced.detach().cpu()))) imageio.imwrite( "results/pynet_img_" + str(j) + "_level_" + str(level) + "_epoch_" + str(epoch) + ".jpg", enhanced) # Evaluate the model loss_mse_eval = 0 loss_psnr_eval = 0 loss_vgg_eval = 0 loss_ssim_eval = 0 generator.eval() with torch.no_grad(): test_iter = iter(test_loader) for j in range(len(test_loader)): x, y = next(test_iter) x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) enhanced = generator(x) loss_mse_temp = MSE_loss(enhanced, y).item() loss_mse_eval += loss_mse_temp loss_psnr_eval += 20 * math.log10( 1.0 / math.sqrt(loss_mse_temp)) if level < 2: loss_ssim_eval += MS_SSIM(y, enhanced) if level < 5: enhanced_vgg_eval = VGG_19( normalize_batch(enhanced)).detach() target_vgg_eval = VGG_19( normalize_batch(y)).detach() loss_vgg_eval += MSE_loss(enhanced_vgg_eval, target_vgg_eval).item() loss_mse_eval = loss_mse_eval / TEST_SIZE loss_psnr_eval = loss_psnr_eval / TEST_SIZE loss_vgg_eval = loss_vgg_eval / TEST_SIZE loss_ssim_eval = loss_ssim_eval / TEST_SIZE if level < 2: print( "Epoch %d, mse: %.4f, psnr: %.4f, vgg: %.4f, ms-ssim: %.4f" % (epoch, loss_mse_eval, loss_psnr_eval, loss_vgg_eval, loss_ssim_eval)) elif level < 5: print( "Epoch %d, mse: %.4f, psnr: %.4f, vgg: %.4f" % (epoch, loss_mse_eval, loss_psnr_eval, loss_vgg_eval)) else: print("Epoch %d, mse: %.4f, psnr: %.4f" % (epoch, loss_mse_eval, loss_psnr_eval)) generator.train()
def train_model(): torch.backends.cudnn.deterministic = True device = torch.device("cuda") print("CUDA visible devices: " + str(torch.cuda.device_count())) print("CUDA Device Name: " + str(torch.cuda.get_device_name(device))) # Creating dataset loaders train_dataset = LoadTrainData(opt.dataroot, TRAIN_SIZE, test=False) train_loader = DataLoader(dataset=train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=1, pin_memory=True, drop_last=True) test_dataset = LoadTrainData(opt.dataroot, TEST_SIZE, test=True) test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) # Creating image processing network and optimizer generator = MWRCAN().to(device) generator = torch.nn.DataParallel(generator) #generator.load_state_dict(torch.load('./ckpt/Track1/mwcnnvggssim4_epoch_60.pth')) optimizer = Adam(params=generator.parameters(), lr=opt.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [50, 100, 150, 200], gamma=0.5) # Losses VGG_19 = vgg_19(device) MSE_loss = torch.nn.MSELoss() MS_SSIM = MSSSIM() L1_loss = torch.nn.L1Loss() # Train the network for epoch in range(opt.epochs): print("lr = %.8f" % (scheduler.get_lr()[0])) torch.cuda.empty_cache() generator.to(device).train() i = 0 for x, y in train_loader: optimizer.zero_grad() x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) enhanced = generator(x) loss_l1 = L1_loss(enhanced, y) enhanced_vgg = VGG_19(normalize_batch(enhanced)) target_vgg = VGG_19(normalize_batch(y)) loss_content = L1_loss(enhanced_vgg, target_vgg) loss_ssim = MS_SSIM(enhanced, y) total_loss = loss_l1 + loss_content + (1 - loss_ssim) * 0.15 if i % 100 == 0: print( "Epoch %d_%d, L1: %.4f, vgg: %.4f, SSIM: %.4f, total: %.4f" % (epoch, i, loss_l1, loss_content, (1 - loss_ssim) * 0.15, total_loss)) total_loss.backward() optimizer.step() i = i + 1 scheduler.step() # Save the model that corresponds to the current epoch generator.eval().cpu() torch.save( generator.state_dict(), os.path.join(opt.save_model_path, "mwrcan_epoch_" + str(epoch) + ".pth")) # Evaluate the model loss_psnr_eval = 0 generator.to(device) generator.eval() with torch.no_grad(): for x, y in test_loader: x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) enhanced = generator(x) enhanced = torch.clamp( torch.round(enhanced * 255), min=0, max=255) / 255 y = torch.clamp(torch.round(y * 255), min=0, max=255) / 255 loss_mse_temp = MSE_loss(enhanced, y).item() loss_psnr_eval += 20 * math.log10( 1.0 / math.sqrt(loss_mse_temp)) loss_psnr_eval = loss_psnr_eval / TEST_SIZE print("Epoch %d, psnr: %.4f" % (epoch, loss_psnr_eval))
def main(): yolo = YOLO() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) parser = argparse.ArgumentParser( description='Training codes for Openpose using Tensorflow') parser.add_argument('--checkpoint_path', type=str, default='checkpoints/train/2018-12-13-16-56-49/') parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt') parser.add_argument('--image', type=str, default=None) # parser.add_argument('--run_model', type=str, default='img') parser.add_argument('--video', type=str, default=None) parser.add_argument('--train_vgg', type=bool, default=True) parser.add_argument('--use_bn', type=bool, default=False) parser.add_argument('--save_video', type=str, default='result/our.mp4') args = parser.parse_args() checkpoint_path = args.checkpoint_path logger.info('checkpoint_path: ' + checkpoint_path) with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3]) img_size = tf.placeholder(dtype=tf.int32, shape=(2, ), name='original_image_size') img_normalized = raw_img / 255 - 0.5 # define vgg19 with slim.arg_scope(vgg.vgg_arg_scope()): vgg_outputs, end_points = vgg.vgg_19(img_normalized) # get net graph logger.info('initializing model...') net = PafNet(inputs_x=vgg_outputs, use_bn=args.use_bn) hm_pre, cpm_pre, added_layers_out = net.gen_net() hm_up = tf.image.resize_area(hm_pre[5], img_size) cpm_up = tf.image.resize_area(cpm_pre[5], img_size) # hm_up = hm_pre[5] # cpm_up = cpm_pre[5] smoother = Smoother({'data': hm_up}, 25, 3.0) gaussian_heatMat = smoother.get_output() max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat)) logger.info('initialize saver...') # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') # trainable_var_list = [] trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if args.train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'), name='vgg_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize session...') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.group(tf.global_variables_initializer())) logger.info('restoring vgg weights...') restorer.restore(sess, args.backbone_net_ckpt_path) logger.info('restoring from checkpoint...') #saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) saver.restore(sess, args.checkpoint_path + 'model-59000.ckpt') logger.info('initialization done') writeVideo_flag = True if args.image is None: if args.video is not None: cap = cv2.VideoCapture(args.video) w = int(cap.get(3)) h = int(cap.get(4)) else: cap = cv2.VideoCapture("images/video.mp4") #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.43.51:554//Streaming/Channels/1") #cap = cv2.VideoCapture("http://*****:*****@192.168.1.111:8081") #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.1.106:554//Streaming/Channels/1") _, image = cap.read() #print(_,image) if image is None: logger.error("Can't read video") sys.exit(-1) fps = cap.get(cv2.CAP_PROP_FPS) ori_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) ori_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #print(fps,ori_w,ori_h) if args.save_video is not None: fourcc = cv2.VideoWriter_fourcc(*'MP4V') video_saver = cv2.VideoWriter('result/our.mp4', fourcc, fps, (ori_w, ori_h)) logger.info('record vide to %s' % args.save_video) logger.info('fps@%f' % fps) size = [int(654 * (ori_h / ori_w)), 654] h = int(654 * (ori_h / ori_w)) time_n = time.time() #print(time_n) max_boxs = 0 person_track = {} yolo2 = YOLO2() while True: face = [] cur1 = conn.cursor() # 获取一个游标 sql = "select * from worker" cur1.execute(sql) data = cur1.fetchall() for d in data: # 注意int类型需要使用str函数转义 name = str(d[1]) + '_' + d[2] face.append(name) cur1.close() # 关闭游标 _, image_fist = cap.read() #穿戴安全措施情况检测 img = Image.fromarray( cv2.cvtColor(image_fist, cv2.COLOR_BGR2RGB)) image, wear = yolo2.detect_image(img) image = np.array(image) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # # 获取警戒线 cv2.line(image, (837, 393), (930, 300), (0, 255, 255), 3) transboundaryline = t.line_detect_possible_demo(image) #openpose二维姿态检测 img = np.array(cv2.resize(image, (654, h))) # cv2.imshow('raw', img) img_corner = np.array( cv2.resize(image, (360, int(360 * (ori_h / ori_w))))) img = img[np.newaxis, :] peaks, heatmap, vectormap = sess.run( [tensor_peaks, hm_up, cpm_up], feed_dict={ raw_img: img, img_size: size }) bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0], vectormap[0]) image, person = TfPoseEstimator.draw_humans(image, bodys, imgcopy=False) #取10右脚 13左脚 foot = [] if len(person) > 0: for p in person: foot_lr = [] if 10 in p and 13 in p: foot_lr.append(p[10]) foot_lr.append(p[13]) if len(foot_lr) > 1: foot.append(foot_lr) fps = round(1 / (time.time() - time_n), 2) image = cv2.putText(image, str(fps) + 'fps', (10, 15), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255)) time_n = time.time() #deep目标检测 image2 = Image.fromarray(image_fist) boxs = yolo.detect_image(image2) features = encoder(image, boxs) detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # print(max_boxs) # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if max_boxs < track.track_id: tracker.tracks.remove(track) tracker._next_id = max_boxs + 1 if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] if track.track_id not in person_track: track2 = copy.deepcopy(track) person_track[track.track_id] = track2 else: track2 = copy.deepcopy(track) bbox2 = person_track[track.track_id].to_tlbr() PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2) PointY2 = bbox2[3] distance = math.sqrt( pow(PointX - PointX2, 2) + pow(PointY - PointY2, 2)) if distance < 120: person_track[track.track_id] = track2 else: # print('last',track.track_id) dis = {} for key in person_track: bbox3 = person_track[key].to_tlbr() PointX3 = bbox3[0] + ( (bbox3[2] - bbox3[0]) / 2) PointY3 = bbox3[3] d = math.sqrt( pow(PointX3 - PointX, 2) + pow(PointY3 - PointY, 2)) dis[key] = d dis = sorted(dis.items(), key=operator.itemgetter(1), reverse=False) track2.track_id = dis[0][0] person_track[dis[0][0]] = track2 tracker.tracks.remove(track) tracker.tracks.append(person_track[track.track_id]) # 写入class try: box_title = face[track2.track_id - 1] except Exception as e: box_title = str(track2.track_id) + "_" + "unknow" if box_title not in workers: wid = box_title.split('_')[0] localtime = time.asctime(time.localtime(time.time())) workers[box_title] = wk.Worker() workers[box_title].set(box_title, localtime, (int(PointX), int(PointY))) cur2 = conn.cursor() # 获取一个游标 sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'" cur2.execute(sql2) cur2.close() # 关闭游标 else: localtime = time.asctime(time.localtime(time.time())) yoloPoint = (int(PointX), int(PointY)) foot_dic = {} wear_dic = {} for f in foot: fp = [] footCenter = ((f[0][0] + f[1][0]) / 2, (f[0][1] + f[1][1]) / 2) foot_dis = int( math.sqrt( pow(footCenter[0] - yoloPoint[0], 2) + pow(footCenter[1] - yoloPoint[1], 2))) #print(foot_dis) fp.append(f) fp.append(footCenter) foot_dic[foot_dis] = fp #print(box_title, 'sss', foot_dic) foot_dic = sorted(foot_dic.items(), key=operator.itemgetter(0), reverse=False) workers[box_title].current_point = foot_dic[0][1][1] workers[box_title].track_point.append( workers[box_title].current_point) #print(box_title,'sss',foot_dic[0][1][1]) mytrack = str(workers[box_title].track_point) wid = box_title.split('_')[0] #卡尔曼滤波预测 if wid not in KalmanNmae: myKalman(wid) if wid not in lmp: setLMP(wid) cpx, cpy = predict(workers[box_title].current_point[0], workers[box_title].current_point[1], wid) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = workers[box_title].current_point[0] cpy[0] = workers[box_title].current_point[1] workers[box_title].next_point = (int(cpx), int(cpy)) workers[box_title].current_footR = foot_dic[0][1][0][0] workers[box_title].current_footL = foot_dic[0][1][0][1] cur3 = conn.cursor() # 获取一个游标 sql = "UPDATE worker SET current_point= '" + str( workers[box_title].current_point ) + "' , current_footR = '" + str( workers[box_title].current_footR ) + "',current_footL = '" + str( workers[box_title].current_footL ) + "',track_point = '" + mytrack + "',next_point = '" + str( workers[box_title].next_point ) + "' WHERE worker_id= '" + wid + "'" cur3.execute(sql) cur3.close() #写入安全措施情况 if len(wear) > 0: for w in wear: wear_dis = int( math.sqrt( pow(w[0] - yoloPoint[0], 2) + pow(w[1] - yoloPoint[1], 2))) wear_dic[wear_dis] = w wear_dic = sorted(wear_dic.items(), key=operator.itemgetter(0), reverse=False) if wear_dic[0][0] < 120: cur4 = conn.cursor() # 获取一个游标 if wear[wear_dic[0][1]] == 1: if len(workers[box_title].wear['no helmet'] ) == 0: workers[box_title].wear[ 'no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[ box_title].wear['no helmet']: workers[box_title].wear[ 'no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 2: if len(workers[box_title]. wear['no work cloths']) == 0: workers[box_title].wear[ 'no work cloths'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[ box_title].wear[ 'no work cloths']: workers[box_title].wear[ 'no work cloths'].append( localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 3: if len(workers[box_title]. wear['unsafe wear']) == 0: workers[box_title].wear[ 'unsafe wear'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[ box_title].wear['unsafe wear']: workers[box_title].wear[ 'unsafe wear'].append( localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 #写入越线情况 if len(workers[box_title].track_point) > 4: for i in range(len(transboundaryline)): p1 = (transboundaryline[i][0], transboundaryline[i][1]) p2 = (transboundaryline[i][2], transboundaryline[i][3]) p3 = workers[box_title].track_point[-2] p4 = workers[box_title].track_point[-1] a = t.IsIntersec(p1, p2, p3, p4) if a == '有交点': cur5 = conn.cursor() # 获取一个游标 cur6 = conn.cursor() # 获取一个游标 cur5.execute( "select time from transboundary where worker_id = '" + wid + "' ") qurrytime = cur5.fetchone() cur5.close() # 关闭游标 if qurrytime == None: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 else: temp1 = 0 for qt in qurrytime: if qt == localtime: temp1 = 1 if temp1 == 0: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 if len(workers[box_title].track_point) >= 20: workers[box_title].previous_point = workers[ box_title].track_point[-5] conn.commit() try: cv2.putText(image, face[track2.track_id - 1], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) except Exception as e: cv2.putText(image, "unknow", (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) if args.video is not None: image[27:img_corner.shape[0] + 27, :img_corner.shape[1]] = img_corner # [3:-10, :] cv2.imshow(' ', image) if args.save_video is not None: video_saver.write(image) cv2.waitKey(1) else: image = common.read_imgfile(args.image) size = [image.shape[0], image.shape[1]] if image is None: logger.error('Image can not be read, path=%s' % args.image) sys.exit(-1) h = int(654 * (size[0] / size[1])) img = np.array(cv2.resize(image, (654, h))) cv2.imshow('ini', img) img = img[np.newaxis, :] peaks, heatmap, vectormap = sess.run( [tensor_peaks, hm_up, cpm_up], feed_dict={ raw_img: img, img_size: size }) cv2.imshow('in', vectormap[0, :, :, 0]) bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0], vectormap[0]) image = TfPoseEstimator.draw_humans(image, bodys, imgcopy=False) cv2.imshow(' ', image) cv2.waitKey(0)
l = loss(logits, y) optimizer.zero_grad() l.backward() optimizer.step() # 执行梯度下降 if i % 50 == 0: acc = (logits.argmax(1) == y).float().mean() print("Epochs[{}/{}]---batch {}---acc {:.4}---loss {:.4}".format( epoch + 1, self.epochs, i, acc, l)) self.net.eval() # 切换到评估模式 print("Epochs[{}/{}]--acc on test {:.4}".format(epoch + 1, self.epochs, self.evaluate(test_iter, self.net, device))) self.net.train() # 切回到训练模式 @staticmethod def evaluate(data_iter, net, device): with torch.no_grad(): acc_sum, n = 0.0, 0 for x, y in data_iter: x, y = x.to(device), y.to(device) logits = net(x) acc_sum += (logits.argmax(1) == y).float().sum().item() n += len(y) return acc_sum / n if __name__ == '__main__': model = vgg_19(num_classes=10) vgg19 = MyModel(model=model, batch_size=128, epochs=5, learning_rate=0.001) vgg19.train() # print(model)
def train(): parser = argparse.ArgumentParser( description='Training codes for Openpose using Tensorflow') parser.add_argument('--batch_size', type=str, default=10) parser.add_argument('--continue_training', type=bool, default=False) parser.add_argument('--checkpoint_path', type=str, default='checkpoints/train/') parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt') parser.add_argument('--train_vgg', type=bool, default=True) parser.add_argument( '--annot_path', type=str, default= '/run/user/1000/gvfs/smb-share:server=server,share=data/yzy/dataset/' 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/' ) parser.add_argument( '--img_path', type=str, default= '/run/user/1000/gvfs/smb-share:server=server,share=data/yzy/dataset/' 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/' ) # parser.add_argument('--annot_path_val', type=str, # default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/' # 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/' # 'person_keypoints_val2017.json') # parser.add_argument('--img_path_val', type=str, # default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/' # 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/val2017/') parser.add_argument('--save_checkpoint_frequency', type=str, default=1000) parser.add_argument('--save_summary_frequency', type=str, default=100) parser.add_argument('--stage_num', type=str, default=6) parser.add_argument('--hm_channels', type=str, default=19) parser.add_argument('--paf_channels', type=str, default=38) parser.add_argument('--input-width', type=int, default=368) parser.add_argument('--input-height', type=int, default=368) parser.add_argument('--max_echos', type=str, default=5) parser.add_argument('--use_bn', type=bool, default=False) parser.add_argument('--loss_func', type=str, default='l2') args = parser.parse_args() if not args.continue_training: start_time = time.localtime(time.time()) checkpoint_path = args.checkpoint_path + ('%d-%d-%d-%d-%d-%d' % start_time[0:6]) os.mkdir(checkpoint_path) else: checkpoint_path = args.checkpoint_path logger = logging.getLogger('train') logger.setLevel(logging.DEBUG) fh = logging.FileHandler(checkpoint_path + '/train_log.log') fh.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) logger.info(args) logger.info('checkpoint_path: ' + checkpoint_path) # define input placeholder with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[args.batch_size, 368, 368, 3]) # mask_hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels]) # mask_paf = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels]) hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels]) paf = tf.placeholder( dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels]) # defien data loader logger.info('initializing data loader...') set_network_input_wh(args.input_width, args.input_height) scale = 8 set_network_scale(scale) df = get_dataflow_batch(args.annot_path, True, args.batch_size, img_path=args.img_path) steps_per_echo = df.size() enqueuer = DataFlowToQueue(df, [raw_img, hm, paf], queue_size=100) q_inp, q_heat, q_vect = enqueuer.dequeue() q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, 1), tf.split( q_heat, 1), tf.split(q_vect, 1) img_normalized = q_inp_split[0] / 255 - 0.5 # [-0.5, 0.5] df_valid = get_dataflow_batch(args.annot_path, False, args.batch_size, img_path=args.img_path) df_valid.reset_state() validation_cache = [] logger.info('initializing model...') # define vgg19 with slim.arg_scope(vgg.vgg_arg_scope()): vgg_outputs, end_points = vgg.vgg_19(img_normalized) # get net graph net = PafNet(inputs_x=vgg_outputs, stage_num=args.stage_num, hm_channel_num=args.hm_channels, use_bn=args.use_bn) hm_pre, paf_pre, added_layers_out = net.gen_net() # two kinds of loss losses = [] with tf.name_scope('loss'): for idx, (l1, l2), in enumerate(zip(hm_pre, paf_pre)): if args.loss_func == 'square': hm_loss = tf.reduce_sum( tf.square(tf.concat(l1, axis=0) - q_heat_split[0])) paf_loss = tf.reduce_sum( tf.square(tf.concat(l2, axis=0) - q_vect_split[0])) losses.append(tf.reduce_sum([hm_loss, paf_loss])) logger.info('use square loss') else: hm_loss = tf.nn.l2_loss( tf.concat(l1, axis=0) - q_heat_split[0]) paf_loss = tf.nn.l2_loss( tf.concat(l2, axis=0) - q_vect_split[0]) losses.append(tf.reduce_mean([hm_loss, paf_loss])) logger.info('use l2 loss') loss = tf.reduce_sum(losses) / args.batch_size global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay(1e-4, global_step, steps_per_echo, 0.5, staircase=True) trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if args.train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19') with tf.name_scope('train'): train = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-8).minimize( loss=loss, global_step=global_step, var_list=trainable_var_list) logger.info('initialize saver...') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'), name='vgg_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize tensorboard') tf.summary.scalar("lr", learning_rate) tf.summary.scalar("loss2", loss) tf.summary.histogram('img_normalized', img_normalized) tf.summary.histogram('vgg_outputs', vgg_outputs) tf.summary.histogram('added_layers_out', added_layers_out) tf.summary.image('vgg_out', tf.transpose(vgg_outputs[0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=512) tf.summary.image('added_layers_out', tf.transpose(added_layers_out[0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=128) tf.summary.image('paf_gt', tf.transpose(q_vect_split[0][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=38) tf.summary.image('hm_gt', tf.transpose(q_heat_split[0][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=19) for i in range(args.stage_num): tf.summary.image('hm_pre_stage_%d' % i, tf.transpose(hm_pre[i][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=19) tf.summary.image('paf_pre_stage_%d' % i, tf.transpose(paf_pre[i][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=38) tf.summary.image('input', img_normalized, max_outputs=4) logger.info('initialize session...') merged = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(checkpoint_path, sess.graph) sess.run(tf.group(tf.global_variables_initializer())) if args.backbone_net_ckpt_path is not None: logger.info('restoring vgg weights from %s' % args.backbone_net_ckpt_path) restorer.restore(sess, args.backbone_net_ckpt_path) if args.continue_training: saver.restore( sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) logger.info('restoring from checkpoint...') logger.info('start training...') coord = tf.train.Coordinator() enqueuer.set_coordinator(coord) enqueuer.start() while True: best_checkpoint = float('inf') for _ in tqdm(range(steps_per_echo), ): total_loss, _, gs_num = sess.run([loss, train, global_step]) echo = gs_num / steps_per_echo if gs_num % args.save_summary_frequency == 0: total_loss, gs_num, summary, lr = sess.run( [loss, global_step, merged, learning_rate]) writer.add_summary(summary, gs_num) logger.info('echos=%f, setp=%d, total_loss=%f, lr=%f' % (echo, gs_num, total_loss, lr)) if gs_num % args.save_checkpoint_frequency == 0: valid_loss = 0 if len(validation_cache) == 0: for images_test, heatmaps, vectmaps in tqdm( df_valid.get_data()): validation_cache.append( (images_test, heatmaps, vectmaps)) df_valid.reset_state() del df_valid df_valid = None for images_test, heatmaps, vectmaps in validation_cache: valid_loss += sess.run(loss, feed_dict={ q_inp: images_test, q_vect: vectmaps, q_heat: heatmaps }) if valid_loss / len(validation_cache) <= best_checkpoint: best_checkpoint = valid_loss / len(validation_cache) saver.save(sess, save_path=checkpoint_path + '/' + 'model', global_step=gs_num) logger.info( 'best_checkpoint = %f, saving checkpoint to ' % best_checkpoint + checkpoint_path + '/' + 'model-%d' % gs_num) else: logger.info('loss = %f drop' % valid_loss / len(validation_cache)) if echo >= args.max_echos: sess.close() return 0
content_input = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='content_input') style_input = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='style_input') # switch RGB to BGR content = tf.reverse(content_input, axis=[-1]) style = tf.reverse(style_input, axis=[-1]) # preprocess image content = vgg.preprocess(content) style = vgg.preprocess(style) encoder_content, encoder_content_points = vgg.vgg_19( content, reuse=False, final_endpoint="conv4_1") encoder_style, encoder_style_points = vgg.vgg_19( style, reuse=True, final_endpoint="conv4_1") # pass the encoded images to AdaIN target_features = AdaIN(encoder_content, encoder_style) # decode target features back to image with tf.variable_scope("decoder_target"): #alpha = 0.8 #target_features=(1-alpha)*encoder_content+alpha*target_features #content-style trade-off generated_img = decoder.decode(target_features) # deprocess image generated_img = vgg.deprocess(generated_img)
def style_transfer(content_image_filename, style_image_filename, model_filename, tensorboard_path, learning_rate, learning_rate_decay_factor, decay_steps, max_iteration): # image process resized_width = 1000 raw_content_image = Image.open(content_image_filename) raw_style_image = Image.open(style_image_filename) if raw_style_image.mode == 'L': raw_content_image = raw_content_image.convert('L') raw_content_image = raw_content_image.convert('RGB') raw_style_image = raw_style_image.convert('RGB') raw_content_image = raw_content_image.resize( (resized_width, int(resized_width * raw_content_image.height / raw_content_image.width)), resample=Image.LANCZOS) raw_style_image = raw_style_image.resize( (resized_width, int(resized_width * raw_style_image.height / raw_style_image.width)), resample=Image.LANCZOS) content_image = np.array(raw_content_image, dtype=np.float32) style_image = np.array(raw_style_image, dtype=np.float32) if len(content_image.shape) != 3 or content_image.shape[2] != 3 or len( style_image.shape) != 3 or style_image.shape[2] != 3: print 'image format error!' return model_layers, mean_pixel = vgg.load_model_data(model_filename) # content image features mean_content_image = np.array([content_image - mean_pixel], dtype=np.float32) content_features, _ = vgg.vgg_19(mean_content_image, model_layers) # style image features mean_style_image = np.array([style_image - mean_pixel], dtype=np.float32) _, style_features = vgg.vgg_19(mean_style_image, model_layers) style_gram_features = [] for features in style_features: features = tf.reshape(features, shape=[-1, features.get_shape()[-1].value]) features_size = reduce(lambda x, y: x.value * y.value, features.get_shape()) gram = tf.matmul(tf.transpose(features), features) / features_size style_gram_features.append(gram) # generated image features initial_image = tf.random_normal( (1, ) + content_image.shape, dtype=tf.float32) * 0.256 generated_image = tf.Variable(initial_image) generated_content_features, generated_style_features = vgg.vgg_19( generated_image, model_layers) generated_gram_features = [] for features in generated_style_features: features = tf.reshape(features, shape=[-1, features.get_shape()[-1].value]) features_size = reduce(lambda x, y: x.value * y.value, features.get_shape()) gram = tf.matmul(tf.transpose(features), features) / features_size generated_gram_features.append(gram) # content loss content_weight = 5.0 content_loss = 0.0 for (content_feature, generated_content_feature) in zip(content_features, generated_content_features): content_feature_size = reduce(lambda x, y: x * y, content_feature.get_shape()).value content_loss += 2 * tf.nn.l2_loss( generated_content_feature - content_feature) / content_feature_size content_loss *= content_weight # style loss style_weight = 500.0 style_layer_weight = 0.2 style_loss = 0.0 for (style_gram_feature, generated_gram_feature) in zip(style_gram_features, generated_gram_features): style_gram_size = reduce(lambda x, y: x.value * y.value, style_gram_feature.get_shape()) style_loss += style_layer_weight * 2 * tf.nn.l2_loss( generated_gram_feature - style_gram_feature) / style_gram_size style_loss *= style_weight # tv loss tv_weight = 100.0 tv_x_size = reduce(mul, (x.value for x in generated_image[:, :, 1:, :].get_shape()), 1) tv_y_size = reduce(mul, (y.value for y in generated_image[:, 1:, :, :].get_shape()), 1) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(generated_image[:, :, 1:, :] - generated_image[:, :, :content_image.shape[1] - 1, :]) / tv_x_size) + (tf.nn.l2_loss(generated_image[:, 1:, :, :] - generated_image[:, :content_image.shape[0] - 1, :, :]) / tv_y_size)) # photorealism regularization, mattion laplacian matrix matting_laplacian_weight = 50000.0 laplacian_content_image = raw_content_image.resize( (10, int(10 * raw_content_image.height / raw_content_image.width)), resample=Image.LANCZOS) laplacian_content_image = np.array(laplacian_content_image, dtype=np.float32) laplacian_generated_image = tf.image.resize_bilinear( generated_image, size=laplacian_content_image.shape[0:2]) matting_laplacian_matrix = image_utils.compute_matting_laplacian( laplacian_content_image) matting_laplacian_matrix1 = image_utils.getlaplacian( laplacian_content_image, np.zeros(shape=(laplacian_content_image.shape[0:2]))) matting_laplacian_sparse_tensor = tf.SparseTensor( indices=np.array( [matting_laplacian_matrix.row, matting_laplacian_matrix.col]).T, values=matting_laplacian_matrix.data, dense_shape=matting_laplacian_matrix.shape) matting_laplacian_tensor = tf.sparse_tensor_to_dense( matting_laplacian_sparse_tensor, default_value=0.0, validate_indices=False) matting_laplacian_loss = 0.0 for dim in range(3): dim_generated_image = tf.slice(laplacian_generated_image, [0, 0, 0, dim], [-1, -1, -1, 1]) dim_generated_image = tf.reshape(dim_generated_image, shape=[-1, 1]) dim_generated_image_product = tf.matmul( tf.matmul(dim_generated_image, matting_laplacian_tensor, transpose_a=True), dim_generated_image) dim_generated_image_product = tf.reshape(dim_generated_image_product, shape=[]) matting_laplacian_loss += dim_generated_image_product matting_laplacian_loss *= matting_laplacian_weight # total loss loss = content_loss + style_loss + tv_loss # optimizer with tf.device('/cpu:0'): global_step = tf.Variable(0, name='global_step', trainable=False) lr = tf.train.exponential_decay(learning_rate, global_step, decay_steps, learning_rate_decay_factor, staircase=True) optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.minimize(loss, global_step=global_step) # summary tf.summary.scalar('loss', loss) tf.summary.scalar('lr', lr) if not os.path.exists(tensorboard_path): os.makedirs(tensorboard_path) init_op = tf.global_variables_initializer() with tf.Session() as sess: summary_op = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_path, sess.graph) sess.run(init_op) start_time = datetime.datetime.now() for i in range(max_iteration): _, loss_value, step, summary_value, lr_value = sess.run( [train_op, loss, global_step, summary_op, lr]) end_time = datetime.datetime.now() print('[{}] Step: {}, loss: {}, lr: {}'.format( end_time - start_time, step, loss_value, lr_value)) writer.add_summary(summary_value, step) if step % 100 == 0 or i == max_iteration - 1: stylized_image = generated_image.eval() stylized_image = stylized_image.reshape( content_image.shape) + mean_pixel stylized_image = np.clip(stylized_image, 0, 255).astype(np.uint8) Image.fromarray(stylized_image).save('data/stylized_' + str(step) + '.jpg', quality=95) print 'success saved stylized_' + str(step) + '.jpg to data/' start_time = end_time print 'style transfer done!'
def train_model(): torch.backends.cudnn.deterministic = True device = torch.device("cuda") print("CUDA visible devices: " + str(torch.cuda.device_count())) print("CUDA Device Name: " + str(torch.cuda.get_device_name(device))) # Creating dataset loaders train_dataset = LoadTrainData(opt.dataroot, TRAIN_SIZE, test=False) train_loader = DataLoader(dataset=train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=1, pin_memory=True, drop_last=True) test_dataset = LoadTrainData(opt.dataroot, TEST_SIZE, test=True) test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) # Creating image processing network and optimizer generator = MWRCAN().to(device) generator = torch.nn.DataParallel(generator) generator.load_state_dict( torch.load('./ckpt/Track1/mwcnnvggssim4_epoch_60.pth')) # generator.load_state_dict(torch.load('./ckpt/Track2/G_epoch_46.pth')) disc = Discriminator().to(device) disc = torch.nn.DataParallel(disc) # disc.load_state_dict(torch.load('./ckpt/Track2/D_epoch_46.pth')) optimizer_g = Adam(params=generator.parameters(), lr=opt.lr) scheduler_g = torch.optim.lr_scheduler.MultiStepLR(optimizer_g, [50, 100, 150, 200], gamma=0.5) optimizer_d = Adam(params=disc.parameters(), lr=opt.lr * 2) scheduler_d = torch.optim.lr_scheduler.MultiStepLR(optimizer_d, [50, 100, 150, 200], gamma=0.5) VGG_19 = vgg_19(device) MSE_loss = torch.nn.MSELoss() MS_SSIM = MSSSIM() L1_loss = torch.nn.L1Loss() # Train the network for epoch in range(opt.epochs): generator.to(device).train() disc.to(device).train() print("generator lr = %.8f; discriminator lr = %.8f" % (scheduler_g.get_lr()[0], scheduler_d.get_lr()[0])) torch.cuda.empty_cache() i = 0 for x, y in train_loader: one = Variable(torch.cuda.FloatTensor(x.shape[0], 1).fill_(1.0), requires_grad=False) zero = Variable(torch.cuda.FloatTensor(x.shape[0], 1).fill_(0.0), requires_grad=False) x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) optimizer_g.zero_grad() enhanced = generator(x) fake_label = disc(enhanced).mean() loss_l1 = L1_loss(enhanced, y) enhanced_vgg = VGG_19(normalize_batch(enhanced)) target_vgg = VGG_19(normalize_batch(y)) loss_content = L1_loss(enhanced_vgg, target_vgg) loss_ssim = MS_SSIM(enhanced, y) adversarial_loss = MSE_loss(one, fake_label) g_loss = loss_l1 + loss_content + ( 1 - loss_ssim) * 0.15 + adversarial_loss * 0.1 g_loss.backward() optimizer_g.step() optimizer_d.zero_grad() real_label = disc(y).mean() fake_label = disc(enhanced.detach()).mean() d_loss = MSE_loss(one, real_label) + MSE_loss(fake_label, zero) d_loss.backward() optimizer_d.step() # Perform the optimization step if i % 100 == 0: #print(loss_ssim) print( "Epoch %d_%d, L1: %.4f, vgg: %.4f, SSIM: %.4f, adv: %.4f, g_loss: %.4f" % (epoch, i, loss_l1, loss_content, (1 - loss_ssim) * 0.15, adversarial_loss * 0.1, g_loss)) print("Epoch %d_%d, d_loss: %.4f" % (epoch, i, d_loss)) i = i + 1 scheduler_g.step() scheduler_d.step() # Save the model that corresponds to the current epoch generator.eval().cpu() disc.eval().cpu() torch.save( generator.state_dict(), os.path.join(opt.save_model_path, "g_epoch_" + str(epoch) + ".pth")) torch.save( disc.state_dict(), os.path.join(opt.save_model_path, "d_epoch_" + str(epoch) + ".pth")) # Evaluate the model generator.to(device) disc.to(device) generator.eval() disc.eval() loss_psnr_eval = 0 with torch.no_grad(): for x, y in test_loader: x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) enhanced = generator(x) enhanced = torch.clamp( torch.round(enhanced * 255), min=0, max=255) / 255 y = torch.clamp(torch.round(y * 255), min=0, max=255) / 255 loss_mse_temp = MSE_loss(enhanced, y).item() loss_psnr_eval += 20 * math.log10( 1.0 / math.sqrt(loss_mse_temp)) loss_psnr_eval = loss_psnr_eval / TEST_SIZE print("Epoch %d, psnr: %.4f" % (epoch, loss_psnr_eval))
def main(): session = tf.Session() ### This section deals with preprocessing for the VGG network ### # Used for variable clipping. For imagenet metamers we always optimize a varible input # bounded between 0-1 and then rescale before going into the network. min_image = 0 max_image = 1 # Parameters for preprocessing VGG, the input images are between 0-255 subtract_value = 0 multiply_value = 255 # The mean channel values used for VGG preprocessing means = [123.68, 116.779, 103.939] # Make an input variable for the network (will be optimized) # Include constraint to maintain variable between min_image and max_image imgs = tf.Variable( np.random.random([1, 224, 224, 3]), dtype=tf.float32, constraint=lambda t: tf.clip_by_value(t, min_image, max_image)) # apply the vgg preprocessing for loaded checkpoint img_preproc = tf.subtract(imgs, subtract_value) img_preproc = tf.multiply(img_preproc, multiply_value) img_preproc = mean_image_subtraction(img_preproc, means) ### Now build the model, and load the saved checkpoint ### # Get vgg.py from https://github.com/tensorflow/models/tree/master/research/slim logits, nets = vgg.vgg_19(img_preproc, is_training=False, scope='vgg_19') # Make a saver and load the checkpoint # model checkpoint http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz saver = tf.train.Saver(var_list=tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='vgg_19')) saver.restore(session, 'vgg_19.ckpt') ### Include pointers to the layers we will use for metamer generation ### ### Much of this block is bookkeeping to make accessing intermediate layers easier ### # The layers that were used in Feather et al. 2019 ('jitted_relu' was removed for figure labels) metamer_layers = [ 'conv1_2_jittered_relu', 'conv2_2_jittered_relu', 'conv3_4_jittered_relu', 'conv4_4_jittered_relu', 'conv5_4_jittered_relu', 'fc6_jittered_relu', 'fc7_jittered_relu', 'fc8' ] nets['input_image'] = imgs nets['image_prepoc'] = img_preproc nets['logits'] = logits nets['min_image'] = min_image nets['max_image'] = max_image nets['subtract_value'] = subtract_value nets['multiply_value'] = multiply_value nets['visualization_input'] = nets['input_image'] nets['predictions'] = tf.nn.softmax(logits) nets['class_labels_key'] = class_names nets['imagenet_logits'] = nets['logits'] # Some of the tfslim networks have an offset for the class index. The saved VGG model does not. nets['class_index_offset'] = int(0) ### The following dictionaries and code are used to grab the activations of the conv layers ### ### before the non-linearity is applied, so that we can apply the modified gradient ReLU. ### ### The activations after the modified gradient ReLU will be the same as the activations ### ### after the normal gradient ReLU. ### # Get the pre-relu layers and add them to nets, format <layer_name_in_nets>:<name_in_graph> layers_pre_relu = { 'conv1_2_prerelu': 'vgg_19/conv1/conv1_2/BiasAdd:0', 'conv2_2_prerelu': 'vgg_19/conv2/conv2_2/BiasAdd:0', 'conv3_4_prerelu': 'vgg_19/conv3/conv3_4/BiasAdd:0', 'conv4_4_prerelu': 'vgg_19/conv4/conv4_4/BiasAdd:0', 'conv5_4_prerelu': 'vgg_19/conv5/conv5_4/BiasAdd:0', 'fc6_prerelu': 'vgg_19/fc6/BiasAdd:0', 'fc7_prerelu': 'vgg_19/fc7/BiasAdd:0' } # remap some of the names in nets for easy access # for VGG, all of these values are after the non-linearity is applied nets['conv1_2'] = nets['vgg_19/conv1/conv1_2'] nets['conv2_2'] = nets['vgg_19/conv2/conv2_2'] nets['conv3_4'] = nets['vgg_19/conv3/conv3_4'] nets['conv4_4'] = nets['vgg_19/conv4/conv4_4'] nets['conv5_4'] = nets['vgg_19/conv5/conv5_4'] nets['fc6'] = nets['vgg_19/fc6'] nets['fc7'] = nets['vgg_19/fc7'] nets['fc8'] = nets['vgg_19/fc8'] add_jitter_layers = [ 'conv1_2', 'conv2_2', 'conv3_4', 'conv4_4', 'conv5_4', 'fc6', 'fc7' ] # This logic is helpful for networks such as ResNet, where we modify the relu for all of the layers # that will be concatenated at the end of the block. for layer_key, layer_name in layers_pre_relu.items(): if type(layer_name) is list: concat_layers_mixed = [] # Some of the mixed layers have layers that are mixed... need to jitter the relu for each? for concat_layer in layer_name: concat_layers_mixed.append( tf.get_default_graph().get_tensor_by_name(concat_layer)) nets[layer_key] = concat_layers_mixed else: nets[layer_key] = tf.get_default_graph().get_tensor_by_name( layer_name) for layer in add_jitter_layers: nets, net_layer_name = add_jitter_relu_to_layer_vgg(nets, layer) # Initialize the input variable, check if other things aren't initialized (useful for generating # metamers from a random network) uninitialized = tf.report_uninitialized_variables().eval(session=session) print('##### \n UNINITIALIZED VARIABLES ARE:') print(uninitialized) print('#####') all_variables = tf.global_variables() init_op = tf.variables_initializer([ var for var in all_variables if any([ var_name.decode('utf-8') in var.name for var_name in uninitialized.tolist() ]) ]) init_op.run(session=session) ### Remaining code block runs some sanity checks with an example image. ### # Pull in an example image that is classified correctly (it is an airplane from imagenet). image_path = 'assets/airplane.png' image_class = 'airliner' image_dict = metamer_helpers.use_image_path_specified_image( image_path, image_class=image_class, im_shape=224) # Normalize between 0-1, since our variables are normalized to those values. image_dict['image'] = ( image_dict['image'] - image_dict['min_value_image_set'] ) / (image_dict['max_value_image_set'] - image_dict['min_value_image_set']) eval_predictions = session.run(nets['predictions'], feed_dict={ imgs: [image_dict['image']] }).ravel() sorted_predictions = np.argsort(eval_predictions)[::-1] prediction_check_msg = 'Predicted image for airliner example is %s with %f prob' % ( class_names[sorted_predictions[0] + nets['class_index_offset']], eval_predictions[sorted_predictions[0]]) predicted_class = class_names[sorted_predictions[0] + nets['class_index_offset']] assert predicted_class == image_class, prediction_check_msg # Make sure that the activations are the same between the normal relu and the modified gradient # relu for an example layer. same_layers = { 'normal_relu': nets['conv3_4'], 'modified_grad_relu': nets['conv3_4_jittered_relu'] } check_relu = session.run(same_layers, feed_dict={imgs: [image_dict['image']]}) relu_check_msg = ( 'The activations after the modified gradient ReLU do not ' 'match the activations after the normal gradient ReLU.') assert np.all(check_relu['normal_relu'] == check_relu['modified_grad_relu']), relu_check_msg return nets, session, metamer_layers
def run_hand(all_video_list, video_output_parent_path, use_bn, train_vgg, checkpoint_path, backbone_net_ckpt_path): with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3]) img_size = tf.placeholder(dtype=tf.int32, shape=(2, ), name='original_image_size') img_normalized = raw_img / 255 - 0.5 # define vgg19 with slim.arg_scope(vgg.vgg_arg_scope()): vgg_outputs, end_points = vgg.vgg_19(img_normalized) # get net graph logger.info('initializing model...') net = PafNet(inputs_x=vgg_outputs, hm_channel_num=2, use_bn=use_bn) hm_pre, added_layers_out = net.gen_hand_net() hm_up = tf.image.resize_area(hm_pre[5], img_size) # cpm_up = tf.image.resize_area(cpm_pre[5], img_size) # hm_up = hm_pre[5] # cpm_up = cpm_pre[5] smoother = Smoother({'data': hm_up}, 25, 3.0) gaussian_heatMat = smoother.get_output() max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat)) logger.info('initialize saver...') # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') # trainable_var_list = [] trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'), name='vgg_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize session...') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.group(tf.global_variables_initializer())) logger.info('restoring vgg weights...') restorer.restore(sess, backbone_net_ckpt_path) logger.info('restoring from checkpoint...') saver.restore( sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) logger.info('initialization done') action_list = all_video_list.keys() for action in action_list: video_list = all_video_list[action] for video in video_list: dir_name = video.split('/') name = dir_name[-2] save_path = video_output_parent_path + '/' + name anno_loader = cut_body_part(anno_file=save_path + '/' + action + '_lstm.json', coco_images=save_path + '/pics/') img_info = [] anno_info = [] for img, hand_list, img_meta, anno in tqdm( anno_loader.crop_part()): for hand in hand_list: position = hand['position'] ori_h = position[3] - position[1] + 1 ori_w = position[2] - position[0] + 1 peaks_origin, heatmap_origin = sess.run( [ tensor_peaks, hm_up, ], feed_dict={ raw_img: hand['hand'][np.newaxis, :, :, :], img_size: [ori_h, ori_w] }) re_origin = np.where( peaks_origin[0, :, :, 0] == np.max(peaks_origin[0, :, :, 0])) peaks_flip, heatmap_flip = sess.run( [ tensor_peaks, hm_up, ], feed_dict={ raw_img: np.fliplr(hand['hand'][np.newaxis, :, :, :]), img_size: [ori_h, ori_w] }) peaks_flip = np.fliplr(peaks_flip) re_flip = np.where( peaks_flip[0, :, :, 0] == np.max(peaks_flip[0, :, :, 0])) anno['keypoints'][hand['idx'] * 3] = int( position[0] + (re_origin[1][0] + re_flip[1][0]) / 2) anno['keypoints'][hand['idx'] * 3 + 1] = int( position[1] + (re_origin[0][0] + re_flip[0][0]) / 2) anno_info.append(anno) img_info.append(img_meta) ref = {"images": img_info, "annotations": anno_info} with open( save_path + '/' + action + '.json'.split('.')[0] + '_hand_coco' + '.json', "w") as f: json.dump(ref, f) print('writed to ' + save_path + '/' + action + '.json'.split('.')[0] + '_hand_coco' + '.json')
def run(): #Create log_dir for evaluation information if not os.path.exists(log_eval): os.mkdir(log_eval) #Just construct the graph from scratch again with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing dataset = get_split('validation', dataset_dir) images, raw_images, labels = load_batch(dataset, batch_size=batch_size, is_training=False) imagescam, rcam, lcam = load_batch(dataset, batch_size=batch_size, is_training=False, cam=True) #Create some information about the training steps num_batches_per_epoch = dataset.num_samples / batch_size num_steps_per_epoch = num_batches_per_epoch #placedholders for CAM compute y_ = tf.placeholder(tf.int64, [None]) x = tf.placeholder_with_default(images, (None, 224, 224, 3)) #Now create the inference model but set is_training=False with slim.arg_scope(vgg_arg_scope()): logits, end_points = vgg_19(x, num_classes=dataset.num_classes, is_training=False, global_pool=True) #Get the class activation maps class_activation_map = get_class_map(1, end_points['vgg_19/conv6'], 224) # #get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) def restore_fn(sess): return saver.restore(sess, checkpoint_file) #Just define the metrics to track without the loss or whatsoever predictions = tf.argmax(end_points['vgg_19/fc8'], 1) # accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels) accuracy, accuracy_update = tf.metrics.accuracy(labels, predictions) metrics_op = tf.group(accuracy_update) #Create the global step and an increment op for monitoring global_step = tf.train.get_or_create_global_step() global_step_op = tf.assign( global_step, global_step + 1 ) #no apply_gradient method so manually increasing the global_step #placedholders for CAM compute y = logits #Create a evaluation step function def eval_step(sess, metrics_op, global_step): ''' Simply takes in a session, runs the metrics op and some logging information. ''' start_time = time.time() print 'start', start_time _ = sess.run(metrics_op) global_step_count = sess.run(global_step_op) accuracy_value = sess.run(accuracy) time_elapsed = time.time() - start_time #Log some information logging.info( 'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value, time_elapsed) print 'starting cam inspection' #produce and save CAMs every 10 steps inspect_class_activation_map(sess, class_activation_map, end_points['vgg_19/conv6'], imagescam, lcam, global_step_count, batch_size, x, y_, y) print 'ending cam inspection' return accuracy_value #Define some scalar quantities to monitor tf.summary.scalar('Validation_Accuracy', accuracy) my_summary_op = tf.summary.merge_all() #Get your supervisor #sv = tf.train.Supervisor(logdir = log_eval, summary_op = None, saver = None, init_fn = restore_fn) #global_step_tensor = tf.Variable(7530, trainable=False, name='global_step') #Now we are ready to run in one session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) restore_fn(sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) while not coord.should_stop(): #from tensorflow.python import debug as tfdb #sess = tfdb.LocalCLIDebugWrapperSession(sess) #tf.train.global_step(sess, global_step_tensor) for step in xrange(num_steps_per_epoch * num_epochs): print global_step sess.run(global_step) #print vital information every start of the epoch as always if step % num_batches_per_epoch == 0: logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1, num_epochs) logging.info('Current Streaming Accuracy: %.4f', sess.run(accuracy)) #Compute summaries every 10 steps and continue evaluating if step % 10 == 0: print 'mod 10' eval_step(sess, metrics_op=metrics_op, global_step=global_step) summaries = sess.run(my_summary_op) #sv.summary_computed(sess, summaries) #Otherwise just run as per normal else: print 'next step' eval_step(sess, metrics_op=metrics_op, global_step=global_step) coord.request_stop() #At the end of all the evaluation, show the final accuracy logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy)) #Now we want to visualize the last batch's images just to see what our model has predicted raw_images, labels, predictions = sess.run( [raw_images, labels, predictions]) for i in range(10): image, label, prediction = raw_images[i], labels[ i], predictions[i] prediction_name, label_name = dataset.labels_to_name[ prediction], dataset.labels_to_name[label] text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name, label_name) img_plot = plt.imshow(image) #Set up the plot and hide axes plt.title(text) img_plot.axes.get_yaxis().set_ticks([]) img_plot.axes.get_xaxis().set_ticks([]) plt.show() coord.request_stop() coord.join(threads) logging.info( 'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.' )