Example #1
0
def net_fatory(net_name, inputs, train_model, FC=False):
    if net_name == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            net, end_points = vgg.vgg_16(inputs,
                                         num_classes=None,
                                         is_training=train_model,
                                         fc_flage=FC)
    elif net_name == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            net, end_points = vgg.vgg_19(inputs,
                                         num_classes=None,
                                         is_training=train_model,
                                         fc_flage=FC)
    elif net_name == 'resnet_v2_50':
        with slim.arg_scope(resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_50(inputs=inputs,
                                                     num_classes=None,
                                                     is_training=train_model,
                                                     global_pool=False)
    elif net_name == 'resnet_v2_152':
        with slim.arg_scope(resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_152(inputs=inputs,
                                                      num_classes=None,
                                                      is_training=train_model,
                                                      global_pool=False)

    return net, end_points
Example #2
0
def extract_image_features(inputs, reuse=True):
    with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points = vgg.vgg_19(inputs,
                                   spatial_squeeze=False,
                                   is_training=False,
                                   reuse=reuse)
    return end_points
Example #3
0
def perceptual_loss(real, fake, network="vgg_16"):
    if params.loss.vgg_w <= 0.0:
        return 0.0

    real = real * params.learning.image_std + params.learning.image_mean
    fake = fake * params.learning.image_std + params.learning.image_mean
    real = utils.perceptual_loss_image_preprocess(real)
    fake = utils.perceptual_loss_image_preprocess(fake)
    image = tf.concat([real, fake], axis=0)

    with tf.variable_scope("perceptual_loss"):
        if network == "vgg_16":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                conv1, conv2, conv3 = vgg.vgg_16(image)
        elif network == "vgg_19":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                conv1, conv2, conv3 = vgg.vgg_19(image)
        else:
            raise NotImplementedError("")

        losses = []
        for i, features in enumerate([conv1, conv2, conv3]):
            real, fake = tf.split(features, 2, 0)
            losses.append(params.loss.perceptual_loss.weights[i] *
                          tf.reduce_mean(tf.square(real - fake)))

        return losses[0] + losses[1] + losses[2]
Example #4
0
 def testNoClasses(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = None
     with self.test_session():
         inputs = tf.random_uniform((batch_size, height, width, 3))
         net, end_points = vgg.vgg_19(inputs, num_classes)
         expected_names = [
             'vgg_19/conv1/conv1_1',
             'vgg_19/conv1/conv1_2',
             'vgg_19/pool1',
             'vgg_19/conv2/conv2_1',
             'vgg_19/conv2/conv2_2',
             'vgg_19/pool2',
             'vgg_19/conv3/conv3_1',
             'vgg_19/conv3/conv3_2',
             'vgg_19/conv3/conv3_3',
             'vgg_19/conv3/conv3_4',
             'vgg_19/pool3',
             'vgg_19/conv4/conv4_1',
             'vgg_19/conv4/conv4_2',
             'vgg_19/conv4/conv4_3',
             'vgg_19/conv4/conv4_4',
             'vgg_19/pool4',
             'vgg_19/conv5/conv5_1',
             'vgg_19/conv5/conv5_2',
             'vgg_19/conv5/conv5_3',
             'vgg_19/conv5/conv5_4',
             'vgg_19/pool5',
             'vgg_19/fc6',
             'vgg_19/fc7',
         ]
         self.assertSetEqual(set(end_points.keys()), set(expected_names))
         self.assertTrue(net.op.name.startswith('vgg_19/fc7'))
Example #5
0
    def style_loss(self, styled_vgg, style_image, layer_names, style_weight,
                   sess):
        style_image_placeholder = tf.placeholder('float',
                                                 shape=style_image.shape)

        with slim.arg_scope(vgg.vgg_arg_scope(reuse=True)):
            _, style_image_vgg = vgg.vgg_19(style_image_placeholder,
                                            num_classes=0,
                                            is_training=False)

        style_loss = 0
        preprocessed_style_image = style_image - np.array([
            ctx.params.R_MEAN, ctx.params.G_MEAN, ctx.params.B_MEAN
        ]).reshape([1, 1, 1, 3])

        for layer_name in layer_names:
            style_image_gram = self.gram_matrix_for_style_image(
                style_image_vgg[layer_name], style_image_placeholder,
                preprocessed_style_image, sess)

            input_image_gram = self.gram_matrix_for_input_image(
                styled_vgg[layer_name])

            style_loss += (2 *
                           tf.nn.l2_loss(input_image_gram -
                                         np.expand_dims(style_image_gram, 0)) /
                           style_image_gram.size)
        return style_weight * style_loss
Example #6
0
def main():
    images_placeholder = tf.placeholder(tf.float32,
                                        shape=(None, IMAGE_SIZE, IMAGE_SIZE,
                                               3))
    _, end_points = vgg_19(images_placeholder,
                           num_classes=None,
                           is_training=False)

    dataset_image_codes, dataset_image_files = get_dataset_image_codes(
        images_placeholder, end_points)
    print(dataset_image_codes.shape)

    images = [os.path.join(FILES_DIR, f'image_{i}.jpg') for i in range(1, 5)]
    query_image_codes = get_query_image_code(images, images_placeholder,
                                             end_points)
    print(query_image_codes.shape)

    neighbors_count = 2
    nearest_neighbors = NearestNeighbors(
        n_neighbors=neighbors_count, metric='cosine').fit(dataset_image_codes)
    _, indices = nearest_neighbors.kneighbors(query_image_codes)

    space = 10
    result_image_size = ((neighbors_count + 1) * (IMAGE_SIZE + space) - space,
                         len(images) * (IMAGE_SIZE + space) - space)

    result_image = Image.new('RGB', result_image_size, 'white')
    for i, filename in enumerate(images):
        query_image = rescale_image(Image.open(filename))
        draw = ImageDraw.Draw(query_image)
        draw.line((0, 0, query_image.width - 1, 0, query_image.width - 1,
                   query_image.height - 1, 0, query_image.height - 1, 0, 0),
                  fill='red',
                  width=1)
        result_image.paste(query_image, (0, i * (IMAGE_SIZE + space)))
        for j in range(neighbors_count):
            neighbor_image = Image.open(dataset_image_files[indices[i][j]])
            result_image.paste(neighbor_image,
                               ((j + 1) * (IMAGE_SIZE + space), i *
                                (IMAGE_SIZE + space)))

    result_image.show()
    result_image.save(os.path.join(FILES_DIR, 'result.jpg'))
Example #7
0
    def _buildGraph(self):
        x_in = tf.placeholder(tf.float32, shape=[None,  # enables variable batch size
                                                 self.input_dim[0]], name="x")
        x_in_reshape = tf.reshape(x_in, [-1, self.input_dim[1], self.input_dim[2], 3])

        dropout = tf.placeholder_with_default(1., shape=[], name="dropout")

        y_in = tf.placeholder(dtype=tf.int8, name="y")

        onehot_labels = tf.one_hot(indices=tf.cast(y_in, tf.int32), depth=2)

        is_train = tf.placeholder_with_default(True, shape=[], name="is_train")

        logits, nett, ww = vgg.vgg_19(x_in_reshape,
                                              num_classes=2,
                                              is_training=is_train,
                                              dropout_keep_prob=dropout,
                                              spatial_squeeze=True,
                                              scope='vgg19')

        pred = tf.nn.softmax(logits, name="prediction")

        global_step = tf.Variable(0, trainable=False)

        pred_cost = tf.losses.softmax_cross_entropy(
            onehot_labels=onehot_labels, logits=logits)

        tf.summary.scalar("InceptionV3_cost", pred_cost)

        train_op = tf.contrib.layers.optimize_loss(
            loss=pred_cost,
            learning_rate=self.learning_rate,
            global_step=global_step,
            optimizer="Adam")

        merged_summary = tf.summary.merge_all()

        return (x_in, dropout, is_train,
                y_in, logits, nett, ww, pred, pred_cost,
                global_step, train_op, merged_summary)
Example #8
0
def train_model():

    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda")

    print("CUDA visible devices: " + str(torch.cuda.device_count()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name(device)))

    # Creating dataset loaders

    train_dataset = LoadData(dataset_dir, TRAIN_SIZE, dslr_scale, test=False)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=1,
                              pin_memory=True,
                              drop_last=True)

    test_dataset = LoadData(dataset_dir, TEST_SIZE, dslr_scale, test=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=1,
                             pin_memory=True,
                             drop_last=False)

    visual_dataset = LoadVisualData(dataset_dir, 10, dslr_scale, level)
    visual_loader = DataLoader(dataset=visual_dataset,
                               batch_size=1,
                               shuffle=False,
                               num_workers=0,
                               pin_memory=True,
                               drop_last=False)

    # Creating image processing network and optimizer

    generator = PyNET(level=level,
                      instance_norm=True,
                      instance_norm_level_1=True).to(device)
    generator = torch.nn.DataParallel(generator)

    optimizer = Adam(params=generator.parameters(), lr=learning_rate)

    # Restoring the variables

    if level < 5:
        generator.load_state_dict(
            torch.load("models/pynet_level_" + str(level + 1) + "_epoch_" +
                       str(restore_epoch) + ".pth"),
            strict=False)

    # Losses

    VGG_19 = vgg_19(device)
    MSE_loss = torch.nn.MSELoss()
    MS_SSIM = MSSSIM()

    # Train the network

    for epoch in range(num_train_epochs):

        torch.cuda.empty_cache()

        train_iter = iter(train_loader)
        for i in range(len(train_loader)):

            optimizer.zero_grad()
            x, y = next(train_iter)

            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            enhanced = generator(x)

            # MSE Loss
            loss_mse = MSE_loss(enhanced, y)

            # VGG Loss

            if level < 5:
                enhanced_vgg = VGG_19(normalize_batch(enhanced))
                target_vgg = VGG_19(normalize_batch(y))
                loss_content = MSE_loss(enhanced_vgg, target_vgg)

            # Total Loss

            if level == 5 or level == 4:
                total_loss = loss_mse
            if level == 3 or level == 2:
                total_loss = loss_mse * 10 + loss_content
            if level == 1:
                total_loss = loss_mse * 10 + loss_content
            if level == 0:
                loss_ssim = MS_SSIM(enhanced, y)
                total_loss = loss_mse + loss_content + (1 - loss_ssim) * 0.4

            # Perform the optimization step

            total_loss.backward()
            optimizer.step()

            if i == 0:

                # Save the model that corresponds to the current epoch

                generator.eval().cpu()
                torch.save(
                    generator.state_dict(), "models/pynet_level_" +
                    str(level) + "_epoch_" + str(epoch) + ".pth")
                generator.to(device).train()

                # Save visual results for several test images

                generator.eval()
                with torch.no_grad():

                    visual_iter = iter(visual_loader)
                    for j in range(len(visual_loader)):

                        torch.cuda.empty_cache()

                        raw_image = next(visual_iter)
                        raw_image = raw_image.to(device, non_blocking=True)

                        enhanced = generator(raw_image.detach())
                        enhanced = np.asarray(
                            to_image(torch.squeeze(enhanced.detach().cpu())))

                        imageio.imwrite(
                            "results/pynet_img_" + str(j) + "_level_" +
                            str(level) + "_epoch_" + str(epoch) + ".jpg",
                            enhanced)

                # Evaluate the model

                loss_mse_eval = 0
                loss_psnr_eval = 0
                loss_vgg_eval = 0
                loss_ssim_eval = 0

                generator.eval()
                with torch.no_grad():

                    test_iter = iter(test_loader)
                    for j in range(len(test_loader)):

                        x, y = next(test_iter)
                        x = x.to(device, non_blocking=True)
                        y = y.to(device, non_blocking=True)
                        enhanced = generator(x)

                        loss_mse_temp = MSE_loss(enhanced, y).item()

                        loss_mse_eval += loss_mse_temp
                        loss_psnr_eval += 20 * math.log10(
                            1.0 / math.sqrt(loss_mse_temp))

                        if level < 2:
                            loss_ssim_eval += MS_SSIM(y, enhanced)

                        if level < 5:
                            enhanced_vgg_eval = VGG_19(
                                normalize_batch(enhanced)).detach()
                            target_vgg_eval = VGG_19(
                                normalize_batch(y)).detach()

                            loss_vgg_eval += MSE_loss(enhanced_vgg_eval,
                                                      target_vgg_eval).item()

                loss_mse_eval = loss_mse_eval / TEST_SIZE
                loss_psnr_eval = loss_psnr_eval / TEST_SIZE
                loss_vgg_eval = loss_vgg_eval / TEST_SIZE
                loss_ssim_eval = loss_ssim_eval / TEST_SIZE

                if level < 2:
                    print(
                        "Epoch %d, mse: %.4f, psnr: %.4f, vgg: %.4f, ms-ssim: %.4f"
                        % (epoch, loss_mse_eval, loss_psnr_eval, loss_vgg_eval,
                           loss_ssim_eval))
                elif level < 5:
                    print(
                        "Epoch %d, mse: %.4f, psnr: %.4f, vgg: %.4f" %
                        (epoch, loss_mse_eval, loss_psnr_eval, loss_vgg_eval))
                else:
                    print("Epoch %d, mse: %.4f, psnr: %.4f" %
                          (epoch, loss_mse_eval, loss_psnr_eval))

                generator.train()
Example #9
0
def train_model():
    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda")

    print("CUDA visible devices: " + str(torch.cuda.device_count()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name(device)))

    # Creating dataset loaders
    train_dataset = LoadTrainData(opt.dataroot, TRAIN_SIZE, test=False)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=1,
                              pin_memory=True,
                              drop_last=True)
    test_dataset = LoadTrainData(opt.dataroot, TEST_SIZE, test=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=1,
                             pin_memory=True,
                             drop_last=False)

    # Creating image processing network and optimizer
    generator = MWRCAN().to(device)
    generator = torch.nn.DataParallel(generator)
    #generator.load_state_dict(torch.load('./ckpt/Track1/mwcnnvggssim4_epoch_60.pth'))

    optimizer = Adam(params=generator.parameters(), lr=opt.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     [50, 100, 150, 200],
                                                     gamma=0.5)

    # Losses
    VGG_19 = vgg_19(device)
    MSE_loss = torch.nn.MSELoss()
    MS_SSIM = MSSSIM()
    L1_loss = torch.nn.L1Loss()

    # Train the network
    for epoch in range(opt.epochs):
        print("lr =  %.8f" % (scheduler.get_lr()[0]))
        torch.cuda.empty_cache()
        generator.to(device).train()
        i = 0
        for x, y in train_loader:
            optimizer.zero_grad()
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            enhanced = generator(x)

            loss_l1 = L1_loss(enhanced, y)

            enhanced_vgg = VGG_19(normalize_batch(enhanced))
            target_vgg = VGG_19(normalize_batch(y))
            loss_content = L1_loss(enhanced_vgg, target_vgg)

            loss_ssim = MS_SSIM(enhanced, y)

            total_loss = loss_l1 + loss_content + (1 - loss_ssim) * 0.15
            if i % 100 == 0:
                print(
                    "Epoch %d_%d, L1: %.4f, vgg: %.4f, SSIM: %.4f, total: %.4f"
                    % (epoch, i, loss_l1, loss_content,
                       (1 - loss_ssim) * 0.15, total_loss))
            total_loss.backward()
            optimizer.step()
            i = i + 1
        scheduler.step()

        # Save the model that corresponds to the current epoch
        generator.eval().cpu()
        torch.save(
            generator.state_dict(),
            os.path.join(opt.save_model_path,
                         "mwrcan_epoch_" + str(epoch) + ".pth"))

        # Evaluate the model
        loss_psnr_eval = 0
        generator.to(device)
        generator.eval()
        with torch.no_grad():
            for x, y in test_loader:
                x = x.to(device, non_blocking=True)
                y = y.to(device, non_blocking=True)
                enhanced = generator(x)
                enhanced = torch.clamp(
                    torch.round(enhanced * 255), min=0, max=255) / 255
                y = torch.clamp(torch.round(y * 255), min=0, max=255) / 255
                loss_mse_temp = MSE_loss(enhanced, y).item()
                loss_psnr_eval += 20 * math.log10(
                    1.0 / math.sqrt(loss_mse_temp))
        loss_psnr_eval = loss_psnr_eval / TEST_SIZE
        print("Epoch %d, psnr: %.4f" % (epoch, loss_psnr_eval))
Example #10
0
def main():

    yolo = YOLO()
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    parser = argparse.ArgumentParser(
        description='Training codes for Openpose using Tensorflow')
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default='checkpoints/train/2018-12-13-16-56-49/')
    parser.add_argument('--backbone_net_ckpt_path',
                        type=str,
                        default='checkpoints/vgg/vgg_19.ckpt')
    parser.add_argument('--image', type=str, default=None)
    # parser.add_argument('--run_model', type=str, default='img')
    parser.add_argument('--video', type=str, default=None)
    parser.add_argument('--train_vgg', type=bool, default=True)
    parser.add_argument('--use_bn', type=bool, default=False)
    parser.add_argument('--save_video', type=str, default='result/our.mp4')
    args = parser.parse_args()
    checkpoint_path = args.checkpoint_path
    logger.info('checkpoint_path: ' + checkpoint_path)

    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3])
        img_size = tf.placeholder(dtype=tf.int32,
                                  shape=(2, ),
                                  name='original_image_size')

    img_normalized = raw_img / 255 - 0.5

    # define vgg19
    with slim.arg_scope(vgg.vgg_arg_scope()):
        vgg_outputs, end_points = vgg.vgg_19(img_normalized)

    # get net graph
    logger.info('initializing model...')
    net = PafNet(inputs_x=vgg_outputs, use_bn=args.use_bn)
    hm_pre, cpm_pre, added_layers_out = net.gen_net()
    hm_up = tf.image.resize_area(hm_pre[5], img_size)
    cpm_up = tf.image.resize_area(cpm_pre[5], img_size)
    # hm_up = hm_pre[5]
    # cpm_up = cpm_pre[5]
    smoother = Smoother({'data': hm_up}, 25, 3.0)
    gaussian_heatMat = smoother.get_output()

    max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat,
                                      window_shape=(3, 3),
                                      pooling_type='MAX',
                                      padding='SAME')
    tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor),
                            gaussian_heatMat, tf.zeros_like(gaussian_heatMat))

    logger.info('initialize saver...')
    # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers')
    # trainable_var_list = []
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if args.train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19')

    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'),
                              name='vgg_restorer')
    saver = tf.train.Saver(trainable_var_list)

    logger.info('initialize session...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.group(tf.global_variables_initializer()))
        logger.info('restoring vgg weights...')
        restorer.restore(sess, args.backbone_net_ckpt_path)
        logger.info('restoring from checkpoint...')
        #saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
        saver.restore(sess, args.checkpoint_path + 'model-59000.ckpt')
        logger.info('initialization done')
        writeVideo_flag = True
        if args.image is None:
            if args.video is not None:
                cap = cv2.VideoCapture(args.video)
                w = int(cap.get(3))
                h = int(cap.get(4))

            else:
                cap = cv2.VideoCapture("images/video.mp4")
                #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.43.51:554//Streaming/Channels/1")
                #cap = cv2.VideoCapture("http://*****:*****@192.168.1.111:8081")
                #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.1.106:554//Streaming/Channels/1")
            _, image = cap.read()
            #print(_,image)
            if image is None:
                logger.error("Can't read video")
                sys.exit(-1)
            fps = cap.get(cv2.CAP_PROP_FPS)
            ori_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            ori_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #print(fps,ori_w,ori_h)
            if args.save_video is not None:
                fourcc = cv2.VideoWriter_fourcc(*'MP4V')
                video_saver = cv2.VideoWriter('result/our.mp4', fourcc, fps,
                                              (ori_w, ori_h))
                logger.info('record vide to %s' % args.save_video)
            logger.info('fps@%f' % fps)
            size = [int(654 * (ori_h / ori_w)), 654]
            h = int(654 * (ori_h / ori_w))
            time_n = time.time()
            #print(time_n)

            max_boxs = 0
            person_track = {}
            yolo2 = YOLO2()

            while True:
                face = []
                cur1 = conn.cursor()  # 获取一个游标
                sql = "select * from worker"
                cur1.execute(sql)
                data = cur1.fetchall()
                for d in data:
                    # 注意int类型需要使用str函数转义
                    name = str(d[1]) + '_' + d[2]

                    face.append(name)
                cur1.close()  # 关闭游标

                _, image_fist = cap.read()
                #穿戴安全措施情况检测

                img = Image.fromarray(
                    cv2.cvtColor(image_fist, cv2.COLOR_BGR2RGB))
                image, wear = yolo2.detect_image(img)
                image = np.array(image)
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # # 获取警戒线
                cv2.line(image, (837, 393), (930, 300), (0, 255, 255), 3)
                transboundaryline = t.line_detect_possible_demo(image)

                #openpose二维姿态检测
                img = np.array(cv2.resize(image, (654, h)))
                # cv2.imshow('raw', img)
                img_corner = np.array(
                    cv2.resize(image, (360, int(360 * (ori_h / ori_w)))))
                img = img[np.newaxis, :]
                peaks, heatmap, vectormap = sess.run(
                    [tensor_peaks, hm_up, cpm_up],
                    feed_dict={
                        raw_img: img,
                        img_size: size
                    })
                bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0],
                                                   vectormap[0])

                image, person = TfPoseEstimator.draw_humans(image,
                                                            bodys,
                                                            imgcopy=False)
                #取10右脚 13左脚

                foot = []
                if len(person) > 0:
                    for p in person:
                        foot_lr = []
                        if 10 in p and 13 in p:
                            foot_lr.append(p[10])
                            foot_lr.append(p[13])

                        if len(foot_lr) > 1:
                            foot.append(foot_lr)

                fps = round(1 / (time.time() - time_n), 2)
                image = cv2.putText(image,
                                    str(fps) + 'fps', (10, 15),
                                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                                    (255, 255, 255))
                time_n = time.time()

                #deep目标检测
                image2 = Image.fromarray(image_fist)
                boxs = yolo.detect_image(image2)
                features = encoder(image, boxs)
                detections = [
                    Detection(bbox, 1.0, feature)
                    for bbox, feature in zip(boxs, features)
                ]
                boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                indices = preprocessing.non_max_suppression(
                    boxes, nms_max_overlap, scores)
                detections = [detections[i] for i in indices]
                if len(boxs) > max_boxs:
                    max_boxs = len(boxs)
                # print(max_boxs)

                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                for track in tracker.tracks:

                    if max_boxs < track.track_id:
                        tracker.tracks.remove(track)
                        tracker._next_id = max_boxs + 1

                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue

                    bbox = track.to_tlbr()
                    PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2)
                    PointY = bbox[3]

                    if track.track_id not in person_track:
                        track2 = copy.deepcopy(track)
                        person_track[track.track_id] = track2

                    else:

                        track2 = copy.deepcopy(track)
                        bbox2 = person_track[track.track_id].to_tlbr()
                        PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2)
                        PointY2 = bbox2[3]
                        distance = math.sqrt(
                            pow(PointX - PointX2, 2) +
                            pow(PointY - PointY2, 2))
                        if distance < 120:
                            person_track[track.track_id] = track2

                        else:

                            # print('last',track.track_id)
                            dis = {}
                            for key in person_track:
                                bbox3 = person_track[key].to_tlbr()
                                PointX3 = bbox3[0] + (
                                    (bbox3[2] - bbox3[0]) / 2)
                                PointY3 = bbox3[3]

                                d = math.sqrt(
                                    pow(PointX3 - PointX, 2) +
                                    pow(PointY3 - PointY, 2))
                                dis[key] = d
                            dis = sorted(dis.items(),
                                         key=operator.itemgetter(1),
                                         reverse=False)

                            track2.track_id = dis[0][0]
                            person_track[dis[0][0]] = track2
                            tracker.tracks.remove(track)
                            tracker.tracks.append(person_track[track.track_id])

                    # 写入class

                    try:
                        box_title = face[track2.track_id - 1]
                    except Exception as e:
                        box_title = str(track2.track_id) + "_" + "unknow"
                    if box_title not in workers:
                        wid = box_title.split('_')[0]
                        localtime = time.asctime(time.localtime(time.time()))
                        workers[box_title] = wk.Worker()
                        workers[box_title].set(box_title, localtime,
                                               (int(PointX), int(PointY)))
                        cur2 = conn.cursor()  # 获取一个游标
                        sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'"
                        cur2.execute(sql2)
                        cur2.close()  # 关闭游标

                    else:
                        localtime = time.asctime(time.localtime(time.time()))
                        yoloPoint = (int(PointX), int(PointY))
                        foot_dic = {}
                        wear_dic = {}

                        for f in foot:
                            fp = []
                            footCenter = ((f[0][0] + f[1][0]) / 2,
                                          (f[0][1] + f[1][1]) / 2)
                            foot_dis = int(
                                math.sqrt(
                                    pow(footCenter[0] - yoloPoint[0], 2) +
                                    pow(footCenter[1] - yoloPoint[1], 2)))
                            #print(foot_dis)
                            fp.append(f)
                            fp.append(footCenter)
                            foot_dic[foot_dis] = fp

                        #print(box_title, 'sss', foot_dic)
                        foot_dic = sorted(foot_dic.items(),
                                          key=operator.itemgetter(0),
                                          reverse=False)
                        workers[box_title].current_point = foot_dic[0][1][1]
                        workers[box_title].track_point.append(
                            workers[box_title].current_point)

                        #print(box_title,'sss',foot_dic[0][1][1])
                        mytrack = str(workers[box_title].track_point)
                        wid = box_title.split('_')[0]
                        #卡尔曼滤波预测
                        if wid not in KalmanNmae:
                            myKalman(wid)
                        if wid not in lmp:
                            setLMP(wid)
                        cpx, cpy = predict(workers[box_title].current_point[0],
                                           workers[box_title].current_point[1],
                                           wid)

                        if cpx[0] == 0.0 or cpy[0] == 0.0:
                            cpx[0] = workers[box_title].current_point[0]
                            cpy[0] = workers[box_title].current_point[1]
                        workers[box_title].next_point = (int(cpx), int(cpy))

                        workers[box_title].current_footR = foot_dic[0][1][0][0]
                        workers[box_title].current_footL = foot_dic[0][1][0][1]
                        cur3 = conn.cursor()  # 获取一个游标
                        sql = "UPDATE worker SET current_point= '" + str(
                            workers[box_title].current_point
                        ) + "' , current_footR = '" + str(
                            workers[box_title].current_footR
                        ) + "',current_footL = '" + str(
                            workers[box_title].current_footL
                        ) + "',track_point = '" + mytrack + "',next_point = '" + str(
                            workers[box_title].next_point
                        ) + "' WHERE worker_id= '" + wid + "'"
                        cur3.execute(sql)
                        cur3.close()
                        #写入安全措施情况
                        if len(wear) > 0:
                            for w in wear:
                                wear_dis = int(
                                    math.sqrt(
                                        pow(w[0] - yoloPoint[0], 2) +
                                        pow(w[1] - yoloPoint[1], 2)))
                                wear_dic[wear_dis] = w
                            wear_dic = sorted(wear_dic.items(),
                                              key=operator.itemgetter(0),
                                              reverse=False)

                            if wear_dic[0][0] < 120:
                                cur4 = conn.cursor()  # 获取一个游标

                                if wear[wear_dic[0][1]] == 1:
                                    if len(workers[box_title].wear['no helmet']
                                           ) == 0:
                                        workers[box_title].wear[
                                            'no helmet'].append(localtime)

                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标

                                    else:
                                        if localtime not in workers[
                                                box_title].wear['no helmet']:

                                            workers[box_title].wear[
                                                'no helmet'].append(localtime)
                                            sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'"
                                            cur4.execute(sql)
                                            cur4.close()  # 关闭游标

                                elif wear[wear_dic[0][1]] == 2:
                                    if len(workers[box_title].
                                           wear['no work cloths']) == 0:
                                        workers[box_title].wear[
                                            'no work cloths'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标
                                    else:
                                        if localtime not in workers[
                                                box_title].wear[
                                                    'no work cloths']:
                                            workers[box_title].wear[
                                                'no work cloths'].append(
                                                    localtime)
                                            sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'"
                                            cur4.execute(sql)
                                            cur4.close()  # 关闭游标
                                elif wear[wear_dic[0][1]] == 3:
                                    if len(workers[box_title].
                                           wear['unsafe wear']) == 0:
                                        workers[box_title].wear[
                                            'unsafe wear'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标
                                    else:
                                        if localtime not in workers[
                                                box_title].wear['unsafe wear']:
                                            workers[box_title].wear[
                                                'unsafe wear'].append(
                                                    localtime)
                                            sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'"
                                            cur4.execute(sql)
                                            cur4.close()  # 关闭游标

                        #写入越线情况

                        if len(workers[box_title].track_point) > 4:

                            for i in range(len(transboundaryline)):
                                p1 = (transboundaryline[i][0],
                                      transboundaryline[i][1])
                                p2 = (transboundaryline[i][2],
                                      transboundaryline[i][3])
                                p3 = workers[box_title].track_point[-2]
                                p4 = workers[box_title].track_point[-1]
                                a = t.IsIntersec(p1, p2, p3, p4)
                                if a == '有交点':

                                    cur5 = conn.cursor()  # 获取一个游标
                                    cur6 = conn.cursor()  # 获取一个游标
                                    cur5.execute(
                                        "select time from transboundary where worker_id = '"
                                        + wid + "' ")
                                    qurrytime = cur5.fetchone()
                                    cur5.close()  # 关闭游标
                                    if qurrytime == None:
                                        print('越线')
                                        sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'"
                                        cur6.execute(sql)
                                        cur6.close()  # 关闭游标
                                    else:
                                        temp1 = 0
                                        for qt in qurrytime:

                                            if qt == localtime:
                                                temp1 = 1
                                        if temp1 == 0:
                                            print('越线')
                                            sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'"
                                            cur6.execute(sql)
                                            cur6.close()  # 关闭游标
                        if len(workers[box_title].track_point) >= 20:
                            workers[box_title].previous_point = workers[
                                box_title].track_point[-5]
                    conn.commit()
                    try:
                        cv2.putText(image, face[track2.track_id - 1],
                                    (int(bbox[0]), int(bbox[1])), 0,
                                    5e-3 * 200, (0, 255, 0), 2)
                    except Exception as e:
                        cv2.putText(image, "unknow",
                                    (int(bbox[0]), int(bbox[1])), 0,
                                    5e-3 * 200, (0, 255, 0), 2)

                if args.video is not None:
                    image[27:img_corner.shape[0] +
                          27, :img_corner.shape[1]] = img_corner  # [3:-10, :]
                cv2.imshow(' ', image)
                if args.save_video is not None:
                    video_saver.write(image)
                cv2.waitKey(1)
            else:

                image = common.read_imgfile(args.image)
                size = [image.shape[0], image.shape[1]]
                if image is None:
                    logger.error('Image can not be read, path=%s' % args.image)
                    sys.exit(-1)
                h = int(654 * (size[0] / size[1]))
                img = np.array(cv2.resize(image, (654, h)))
                cv2.imshow('ini', img)
                img = img[np.newaxis, :]
                peaks, heatmap, vectormap = sess.run(
                    [tensor_peaks, hm_up, cpm_up],
                    feed_dict={
                        raw_img: img,
                        img_size: size
                    })
                cv2.imshow('in', vectormap[0, :, :, 0])
                bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0],
                                                   vectormap[0])
                image = TfPoseEstimator.draw_humans(image,
                                                    bodys,
                                                    imgcopy=False)
                cv2.imshow(' ', image)
                cv2.waitKey(0)
Example #11
0
                l = loss(logits, y)
                optimizer.zero_grad()
                l.backward()
                optimizer.step()  # 执行梯度下降
                if i % 50 == 0:
                    acc = (logits.argmax(1) == y).float().mean()
                    print("Epochs[{}/{}]---batch {}---acc {:.4}---loss {:.4}".format(
                        epoch + 1, self.epochs, i, acc, l))
            self.net.eval()  # 切换到评估模式
            print("Epochs[{}/{}]--acc on test {:.4}".format(epoch + 1, self.epochs,
                                                            self.evaluate(test_iter, self.net, device)))
            self.net.train()  # 切回到训练模式

    @staticmethod
    def evaluate(data_iter, net, device):
        with torch.no_grad():
            acc_sum, n = 0.0, 0
            for x, y in data_iter:
                x, y = x.to(device), y.to(device)
                logits = net(x)
                acc_sum += (logits.argmax(1) == y).float().sum().item()
                n += len(y)
            return acc_sum / n


if __name__ == '__main__':
    model = vgg_19(num_classes=10)
    vgg19 = MyModel(model=model, batch_size=128, epochs=5, learning_rate=0.001)
    vgg19.train()
    # print(model)
Example #12
0
def train():
    parser = argparse.ArgumentParser(
        description='Training codes for Openpose using Tensorflow')
    parser.add_argument('--batch_size', type=str, default=10)
    parser.add_argument('--continue_training', type=bool, default=False)
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default='checkpoints/train/')
    parser.add_argument('--backbone_net_ckpt_path',
                        type=str,
                        default='checkpoints/vgg/vgg_19.ckpt')
    parser.add_argument('--train_vgg', type=bool, default=True)
    parser.add_argument(
        '--annot_path',
        type=str,
        default=
        '/run/user/1000/gvfs/smb-share:server=server,share=data/yzy/dataset/'
        'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/'
    )
    parser.add_argument(
        '--img_path',
        type=str,
        default=
        '/run/user/1000/gvfs/smb-share:server=server,share=data/yzy/dataset/'
        'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/'
    )
    # parser.add_argument('--annot_path_val', type=str,
    #                     default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/'
    #                             'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/'
    #                             'person_keypoints_val2017.json')
    # parser.add_argument('--img_path_val', type=str,
    #                     default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/'
    #                             'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/val2017/')
    parser.add_argument('--save_checkpoint_frequency', type=str, default=1000)
    parser.add_argument('--save_summary_frequency', type=str, default=100)
    parser.add_argument('--stage_num', type=str, default=6)
    parser.add_argument('--hm_channels', type=str, default=19)
    parser.add_argument('--paf_channels', type=str, default=38)
    parser.add_argument('--input-width', type=int, default=368)
    parser.add_argument('--input-height', type=int, default=368)
    parser.add_argument('--max_echos', type=str, default=5)
    parser.add_argument('--use_bn', type=bool, default=False)
    parser.add_argument('--loss_func', type=str, default='l2')
    args = parser.parse_args()

    if not args.continue_training:
        start_time = time.localtime(time.time())
        checkpoint_path = args.checkpoint_path + ('%d-%d-%d-%d-%d-%d' %
                                                  start_time[0:6])
        os.mkdir(checkpoint_path)
    else:
        checkpoint_path = args.checkpoint_path

    logger = logging.getLogger('train')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(checkpoint_path + '/train_log.log')
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    logger.addHandler(fh)
    logger.info(args)
    logger.info('checkpoint_path: ' + checkpoint_path)

    # define input placeholder
    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32,
                                 shape=[args.batch_size, 368, 368, 3])
        # mask_hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels])
        # mask_paf = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels])
        hm = tf.placeholder(dtype=tf.float32,
                            shape=[args.batch_size, 46, 46, args.hm_channels])
        paf = tf.placeholder(
            dtype=tf.float32,
            shape=[args.batch_size, 46, 46, args.paf_channels])

    # defien data loader
    logger.info('initializing data loader...')
    set_network_input_wh(args.input_width, args.input_height)
    scale = 8
    set_network_scale(scale)
    df = get_dataflow_batch(args.annot_path,
                            True,
                            args.batch_size,
                            img_path=args.img_path)
    steps_per_echo = df.size()
    enqueuer = DataFlowToQueue(df, [raw_img, hm, paf], queue_size=100)
    q_inp, q_heat, q_vect = enqueuer.dequeue()
    q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, 1), tf.split(
        q_heat, 1), tf.split(q_vect, 1)
    img_normalized = q_inp_split[0] / 255 - 0.5  # [-0.5, 0.5]

    df_valid = get_dataflow_batch(args.annot_path,
                                  False,
                                  args.batch_size,
                                  img_path=args.img_path)
    df_valid.reset_state()
    validation_cache = []

    logger.info('initializing model...')
    # define vgg19
    with slim.arg_scope(vgg.vgg_arg_scope()):
        vgg_outputs, end_points = vgg.vgg_19(img_normalized)

    # get net graph
    net = PafNet(inputs_x=vgg_outputs,
                 stage_num=args.stage_num,
                 hm_channel_num=args.hm_channels,
                 use_bn=args.use_bn)
    hm_pre, paf_pre, added_layers_out = net.gen_net()

    # two kinds of loss
    losses = []
    with tf.name_scope('loss'):
        for idx, (l1, l2), in enumerate(zip(hm_pre, paf_pre)):
            if args.loss_func == 'square':
                hm_loss = tf.reduce_sum(
                    tf.square(tf.concat(l1, axis=0) - q_heat_split[0]))
                paf_loss = tf.reduce_sum(
                    tf.square(tf.concat(l2, axis=0) - q_vect_split[0]))
                losses.append(tf.reduce_sum([hm_loss, paf_loss]))
                logger.info('use square loss')
            else:
                hm_loss = tf.nn.l2_loss(
                    tf.concat(l1, axis=0) - q_heat_split[0])
                paf_loss = tf.nn.l2_loss(
                    tf.concat(l2, axis=0) - q_vect_split[0])
                losses.append(tf.reduce_mean([hm_loss, paf_loss]))
                logger.info('use l2 loss')
        loss = tf.reduce_sum(losses) / args.batch_size

    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.train.exponential_decay(1e-4,
                                               global_step,
                                               steps_per_echo,
                                               0.5,
                                               staircase=True)
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if args.train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19')
    with tf.name_scope('train'):
        train = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       epsilon=1e-8).minimize(
                                           loss=loss,
                                           global_step=global_step,
                                           var_list=trainable_var_list)
    logger.info('initialize saver...')
    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'),
                              name='vgg_restorer')
    saver = tf.train.Saver(trainable_var_list)

    logger.info('initialize tensorboard')
    tf.summary.scalar("lr", learning_rate)
    tf.summary.scalar("loss2", loss)
    tf.summary.histogram('img_normalized', img_normalized)
    tf.summary.histogram('vgg_outputs', vgg_outputs)
    tf.summary.histogram('added_layers_out', added_layers_out)
    tf.summary.image('vgg_out',
                     tf.transpose(vgg_outputs[0:1, :, :, :], perm=[3, 1, 2,
                                                                   0]),
                     max_outputs=512)
    tf.summary.image('added_layers_out',
                     tf.transpose(added_layers_out[0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=128)
    tf.summary.image('paf_gt',
                     tf.transpose(q_vect_split[0][0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=38)
    tf.summary.image('hm_gt',
                     tf.transpose(q_heat_split[0][0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=19)
    for i in range(args.stage_num):
        tf.summary.image('hm_pre_stage_%d' % i,
                         tf.transpose(hm_pre[i][0:1, :, :, :],
                                      perm=[3, 1, 2, 0]),
                         max_outputs=19)
        tf.summary.image('paf_pre_stage_%d' % i,
                         tf.transpose(paf_pre[i][0:1, :, :, :],
                                      perm=[3, 1, 2, 0]),
                         max_outputs=38)
    tf.summary.image('input', img_normalized, max_outputs=4)

    logger.info('initialize session...')
    merged = tf.summary.merge_all()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        writer = tf.summary.FileWriter(checkpoint_path, sess.graph)
        sess.run(tf.group(tf.global_variables_initializer()))
        if args.backbone_net_ckpt_path is not None:
            logger.info('restoring vgg weights from %s' %
                        args.backbone_net_ckpt_path)
            restorer.restore(sess, args.backbone_net_ckpt_path)
        if args.continue_training:
            saver.restore(
                sess,
                tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
            logger.info('restoring from checkpoint...')
        logger.info('start training...')
        coord = tf.train.Coordinator()
        enqueuer.set_coordinator(coord)
        enqueuer.start()
        while True:
            best_checkpoint = float('inf')
            for _ in tqdm(range(steps_per_echo), ):
                total_loss, _, gs_num = sess.run([loss, train, global_step])
                echo = gs_num / steps_per_echo

                if gs_num % args.save_summary_frequency == 0:
                    total_loss, gs_num, summary, lr = sess.run(
                        [loss, global_step, merged, learning_rate])
                    writer.add_summary(summary, gs_num)
                    logger.info('echos=%f, setp=%d, total_loss=%f, lr=%f' %
                                (echo, gs_num, total_loss, lr))

                if gs_num % args.save_checkpoint_frequency == 0:
                    valid_loss = 0
                    if len(validation_cache) == 0:
                        for images_test, heatmaps, vectmaps in tqdm(
                                df_valid.get_data()):
                            validation_cache.append(
                                (images_test, heatmaps, vectmaps))
                        df_valid.reset_state()
                        del df_valid
                        df_valid = None

                    for images_test, heatmaps, vectmaps in validation_cache:
                        valid_loss += sess.run(loss,
                                               feed_dict={
                                                   q_inp: images_test,
                                                   q_vect: vectmaps,
                                                   q_heat: heatmaps
                                               })

                    if valid_loss / len(validation_cache) <= best_checkpoint:
                        best_checkpoint = valid_loss / len(validation_cache)
                        saver.save(sess,
                                   save_path=checkpoint_path + '/' + 'model',
                                   global_step=gs_num)
                        logger.info(
                            'best_checkpoint = %f, saving checkpoint to ' %
                            best_checkpoint + checkpoint_path + '/' +
                            'model-%d' % gs_num)

                    else:
                        logger.info('loss = %f drop' % valid_loss /
                                    len(validation_cache))

                if echo >= args.max_echos:
                    sess.close()
                    return 0
        content_input = tf.placeholder(tf.float32,
                                       shape=(1, None, None, 3),
                                       name='content_input')
        style_input = tf.placeholder(tf.float32,
                                     shape=(1, None, None, 3),
                                     name='style_input')

        # switch RGB to BGR
        content = tf.reverse(content_input, axis=[-1])
        style = tf.reverse(style_input, axis=[-1])

        # preprocess image
        content = vgg.preprocess(content)
        style = vgg.preprocess(style)

        encoder_content, encoder_content_points = vgg.vgg_19(
            content, reuse=False, final_endpoint="conv4_1")
        encoder_style, encoder_style_points = vgg.vgg_19(
            style, reuse=True, final_endpoint="conv4_1")

        # pass the encoded images to AdaIN
        target_features = AdaIN(encoder_content, encoder_style)

        # decode target features back to image
        with tf.variable_scope("decoder_target"):
            #alpha = 0.8
            #target_features=(1-alpha)*encoder_content+alpha*target_features #content-style trade-off
            generated_img = decoder.decode(target_features)

            # deprocess image
            generated_img = vgg.deprocess(generated_img)
Example #14
0
def style_transfer(content_image_filename, style_image_filename,
                   model_filename, tensorboard_path, learning_rate,
                   learning_rate_decay_factor, decay_steps, max_iteration):
    # image process
    resized_width = 1000
    raw_content_image = Image.open(content_image_filename)
    raw_style_image = Image.open(style_image_filename)
    if raw_style_image.mode == 'L':
        raw_content_image = raw_content_image.convert('L')
    raw_content_image = raw_content_image.convert('RGB')
    raw_style_image = raw_style_image.convert('RGB')
    raw_content_image = raw_content_image.resize(
        (resized_width,
         int(resized_width * raw_content_image.height /
             raw_content_image.width)),
        resample=Image.LANCZOS)
    raw_style_image = raw_style_image.resize(
        (resized_width,
         int(resized_width * raw_style_image.height / raw_style_image.width)),
        resample=Image.LANCZOS)
    content_image = np.array(raw_content_image, dtype=np.float32)
    style_image = np.array(raw_style_image, dtype=np.float32)
    if len(content_image.shape) != 3 or content_image.shape[2] != 3 or len(
            style_image.shape) != 3 or style_image.shape[2] != 3:
        print 'image format error!'
        return

    model_layers, mean_pixel = vgg.load_model_data(model_filename)

    # content image features
    mean_content_image = np.array([content_image - mean_pixel],
                                  dtype=np.float32)
    content_features, _ = vgg.vgg_19(mean_content_image, model_layers)

    # style image features
    mean_style_image = np.array([style_image - mean_pixel], dtype=np.float32)
    _, style_features = vgg.vgg_19(mean_style_image, model_layers)
    style_gram_features = []
    for features in style_features:
        features = tf.reshape(features,
                              shape=[-1, features.get_shape()[-1].value])
        features_size = reduce(lambda x, y: x.value * y.value,
                               features.get_shape())
        gram = tf.matmul(tf.transpose(features), features) / features_size
        style_gram_features.append(gram)

    # generated image features
    initial_image = tf.random_normal(
        (1, ) + content_image.shape, dtype=tf.float32) * 0.256
    generated_image = tf.Variable(initial_image)
    generated_content_features, generated_style_features = vgg.vgg_19(
        generated_image, model_layers)
    generated_gram_features = []
    for features in generated_style_features:
        features = tf.reshape(features,
                              shape=[-1, features.get_shape()[-1].value])
        features_size = reduce(lambda x, y: x.value * y.value,
                               features.get_shape())
        gram = tf.matmul(tf.transpose(features), features) / features_size
        generated_gram_features.append(gram)

    # content loss
    content_weight = 5.0
    content_loss = 0.0
    for (content_feature,
         generated_content_feature) in zip(content_features,
                                           generated_content_features):
        content_feature_size = reduce(lambda x, y: x * y,
                                      content_feature.get_shape()).value
        content_loss += 2 * tf.nn.l2_loss(
            generated_content_feature - content_feature) / content_feature_size
    content_loss *= content_weight

    # style loss
    style_weight = 500.0
    style_layer_weight = 0.2
    style_loss = 0.0
    for (style_gram_feature,
         generated_gram_feature) in zip(style_gram_features,
                                        generated_gram_features):
        style_gram_size = reduce(lambda x, y: x.value * y.value,
                                 style_gram_feature.get_shape())
        style_loss += style_layer_weight * 2 * tf.nn.l2_loss(
            generated_gram_feature - style_gram_feature) / style_gram_size
    style_loss *= style_weight

    # tv loss
    tv_weight = 100.0
    tv_x_size = reduce(mul,
                       (x.value
                        for x in generated_image[:, :, 1:, :].get_shape()), 1)
    tv_y_size = reduce(mul,
                       (y.value
                        for y in generated_image[:, 1:, :, :].get_shape()), 1)
    tv_loss = tv_weight * 2 * (
        (tf.nn.l2_loss(generated_image[:, :, 1:, :] -
                       generated_image[:, :, :content_image.shape[1] - 1, :]) /
         tv_x_size) +
        (tf.nn.l2_loss(generated_image[:, 1:, :, :] -
                       generated_image[:, :content_image.shape[0] - 1, :, :]) /
         tv_y_size))

    # photorealism regularization, mattion laplacian matrix
    matting_laplacian_weight = 50000.0
    laplacian_content_image = raw_content_image.resize(
        (10, int(10 * raw_content_image.height / raw_content_image.width)),
        resample=Image.LANCZOS)
    laplacian_content_image = np.array(laplacian_content_image,
                                       dtype=np.float32)
    laplacian_generated_image = tf.image.resize_bilinear(
        generated_image, size=laplacian_content_image.shape[0:2])

    matting_laplacian_matrix = image_utils.compute_matting_laplacian(
        laplacian_content_image)
    matting_laplacian_matrix1 = image_utils.getlaplacian(
        laplacian_content_image,
        np.zeros(shape=(laplacian_content_image.shape[0:2])))

    matting_laplacian_sparse_tensor = tf.SparseTensor(
        indices=np.array(
            [matting_laplacian_matrix.row, matting_laplacian_matrix.col]).T,
        values=matting_laplacian_matrix.data,
        dense_shape=matting_laplacian_matrix.shape)
    matting_laplacian_tensor = tf.sparse_tensor_to_dense(
        matting_laplacian_sparse_tensor,
        default_value=0.0,
        validate_indices=False)
    matting_laplacian_loss = 0.0
    for dim in range(3):
        dim_generated_image = tf.slice(laplacian_generated_image,
                                       [0, 0, 0, dim], [-1, -1, -1, 1])
        dim_generated_image = tf.reshape(dim_generated_image, shape=[-1, 1])
        dim_generated_image_product = tf.matmul(
            tf.matmul(dim_generated_image,
                      matting_laplacian_tensor,
                      transpose_a=True), dim_generated_image)
        dim_generated_image_product = tf.reshape(dim_generated_image_product,
                                                 shape=[])
        matting_laplacian_loss += dim_generated_image_product
    matting_laplacian_loss *= matting_laplacian_weight

    # total loss
    loss = content_loss + style_loss + tv_loss

    # optimizer
    with tf.device('/cpu:0'):
        global_step = tf.Variable(0, name='global_step', trainable=False)
    lr = tf.train.exponential_decay(learning_rate,
                                    global_step,
                                    decay_steps,
                                    learning_rate_decay_factor,
                                    staircase=True)
    optimizer = tf.train.AdamOptimizer(lr)
    train_op = optimizer.minimize(loss, global_step=global_step)

    # summary
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('lr', lr)
    if not os.path.exists(tensorboard_path):
        os.makedirs(tensorboard_path)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        summary_op = tf.summary.merge_all()
        writer = tf.summary.FileWriter(tensorboard_path, sess.graph)
        sess.run(init_op)
        start_time = datetime.datetime.now()
        for i in range(max_iteration):
            _, loss_value, step, summary_value, lr_value = sess.run(
                [train_op, loss, global_step, summary_op, lr])
            end_time = datetime.datetime.now()
            print('[{}] Step: {}, loss: {}, lr: {}'.format(
                end_time - start_time, step, loss_value, lr_value))
            writer.add_summary(summary_value, step)
            if step % 100 == 0 or i == max_iteration - 1:
                stylized_image = generated_image.eval()
                stylized_image = stylized_image.reshape(
                    content_image.shape) + mean_pixel
                stylized_image = np.clip(stylized_image, 0,
                                         255).astype(np.uint8)
                Image.fromarray(stylized_image).save('data/stylized_' +
                                                     str(step) + '.jpg',
                                                     quality=95)
                print 'success saved stylized_' + str(step) + '.jpg to data/'
            start_time = end_time
    print 'style transfer done!'
Example #15
0
def train_model():
    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda")

    print("CUDA visible devices: " + str(torch.cuda.device_count()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name(device)))

    # Creating dataset loaders
    train_dataset = LoadTrainData(opt.dataroot, TRAIN_SIZE, test=False)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=1,
                              pin_memory=True,
                              drop_last=True)
    test_dataset = LoadTrainData(opt.dataroot, TEST_SIZE, test=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=1,
                             pin_memory=True,
                             drop_last=False)

    # Creating image processing network and optimizer
    generator = MWRCAN().to(device)
    generator = torch.nn.DataParallel(generator)
    generator.load_state_dict(
        torch.load('./ckpt/Track1/mwcnnvggssim4_epoch_60.pth'))
    # generator.load_state_dict(torch.load('./ckpt/Track2/G_epoch_46.pth'))
    disc = Discriminator().to(device)
    disc = torch.nn.DataParallel(disc)
    # disc.load_state_dict(torch.load('./ckpt/Track2/D_epoch_46.pth'))

    optimizer_g = Adam(params=generator.parameters(), lr=opt.lr)
    scheduler_g = torch.optim.lr_scheduler.MultiStepLR(optimizer_g,
                                                       [50, 100, 150, 200],
                                                       gamma=0.5)
    optimizer_d = Adam(params=disc.parameters(), lr=opt.lr * 2)
    scheduler_d = torch.optim.lr_scheduler.MultiStepLR(optimizer_d,
                                                       [50, 100, 150, 200],
                                                       gamma=0.5)

    VGG_19 = vgg_19(device)
    MSE_loss = torch.nn.MSELoss()
    MS_SSIM = MSSSIM()
    L1_loss = torch.nn.L1Loss()

    # Train the network
    for epoch in range(opt.epochs):
        generator.to(device).train()
        disc.to(device).train()
        print("generator lr =  %.8f; discriminator lr =  %.8f" %
              (scheduler_g.get_lr()[0], scheduler_d.get_lr()[0]))
        torch.cuda.empty_cache()
        i = 0
        for x, y in train_loader:
            one = Variable(torch.cuda.FloatTensor(x.shape[0], 1).fill_(1.0),
                           requires_grad=False)
            zero = Variable(torch.cuda.FloatTensor(x.shape[0], 1).fill_(0.0),
                            requires_grad=False)
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            optimizer_g.zero_grad()
            enhanced = generator(x)
            fake_label = disc(enhanced).mean()

            loss_l1 = L1_loss(enhanced, y)
            enhanced_vgg = VGG_19(normalize_batch(enhanced))
            target_vgg = VGG_19(normalize_batch(y))
            loss_content = L1_loss(enhanced_vgg, target_vgg)
            loss_ssim = MS_SSIM(enhanced, y)
            adversarial_loss = MSE_loss(one, fake_label)

            g_loss = loss_l1 + loss_content + (
                1 - loss_ssim) * 0.15 + adversarial_loss * 0.1
            g_loss.backward()
            optimizer_g.step()

            optimizer_d.zero_grad()
            real_label = disc(y).mean()
            fake_label = disc(enhanced.detach()).mean()
            d_loss = MSE_loss(one, real_label) + MSE_loss(fake_label, zero)
            d_loss.backward()
            optimizer_d.step()

            # Perform the optimization step
            if i % 100 == 0:
                #print(loss_ssim)
                print(
                    "Epoch %d_%d, L1: %.4f, vgg: %.4f, SSIM: %.4f, adv: %.4f, g_loss: %.4f"
                    % (epoch, i, loss_l1, loss_content,
                       (1 - loss_ssim) * 0.15, adversarial_loss * 0.1, g_loss))
                print("Epoch %d_%d, d_loss: %.4f" % (epoch, i, d_loss))
            i = i + 1

        scheduler_g.step()
        scheduler_d.step()

        # Save the model that corresponds to the current epoch
        generator.eval().cpu()
        disc.eval().cpu()
        torch.save(
            generator.state_dict(),
            os.path.join(opt.save_model_path,
                         "g_epoch_" + str(epoch) + ".pth"))
        torch.save(
            disc.state_dict(),
            os.path.join(opt.save_model_path,
                         "d_epoch_" + str(epoch) + ".pth"))

        # Evaluate the model
        generator.to(device)
        disc.to(device)
        generator.eval()
        disc.eval()
        loss_psnr_eval = 0
        with torch.no_grad():
            for x, y in test_loader:
                x = x.to(device, non_blocking=True)
                y = y.to(device, non_blocking=True)
                enhanced = generator(x)
                enhanced = torch.clamp(
                    torch.round(enhanced * 255), min=0, max=255) / 255
                y = torch.clamp(torch.round(y * 255), min=0, max=255) / 255
                loss_mse_temp = MSE_loss(enhanced, y).item()
                loss_psnr_eval += 20 * math.log10(
                    1.0 / math.sqrt(loss_mse_temp))
        loss_psnr_eval = loss_psnr_eval / TEST_SIZE
        print("Epoch %d, psnr: %.4f" % (epoch, loss_psnr_eval))
Example #16
0
def main():

    session = tf.Session()

    ### This section deals with preprocessing for the VGG network ###

    # Used for variable clipping. For imagenet metamers we always optimize a varible input
    # bounded between 0-1 and then rescale before going into the network.
    min_image = 0
    max_image = 1

    # Parameters for preprocessing VGG, the input images are between 0-255
    subtract_value = 0
    multiply_value = 255

    # The mean channel values used for VGG preprocessing
    means = [123.68, 116.779, 103.939]

    # Make an input variable for the network (will be optimized)
    # Include constraint to maintain variable between min_image and max_image
    imgs = tf.Variable(
        np.random.random([1, 224, 224, 3]),
        dtype=tf.float32,
        constraint=lambda t: tf.clip_by_value(t, min_image, max_image))

    # apply the vgg preprocessing for loaded checkpoint
    img_preproc = tf.subtract(imgs, subtract_value)
    img_preproc = tf.multiply(img_preproc, multiply_value)
    img_preproc = mean_image_subtraction(img_preproc, means)

    ### Now build the model, and load the saved checkpoint ###

    # Get vgg.py from https://github.com/tensorflow/models/tree/master/research/slim
    logits, nets = vgg.vgg_19(img_preproc, is_training=False, scope='vgg_19')

    # Make a saver and load the checkpoint
    # model checkpoint http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz
    saver = tf.train.Saver(var_list=tf.get_collection(
        tf.GraphKeys.GLOBAL_VARIABLES, scope='vgg_19'))
    saver.restore(session, 'vgg_19.ckpt')

    ### Include pointers to the layers we will use for metamer generation ###
    ### Much of this block is bookkeeping to make accessing intermediate layers easier ###

    # The layers that were used in Feather et al. 2019 ('jitted_relu' was removed for figure labels)
    metamer_layers = [
        'conv1_2_jittered_relu', 'conv2_2_jittered_relu',
        'conv3_4_jittered_relu', 'conv4_4_jittered_relu',
        'conv5_4_jittered_relu', 'fc6_jittered_relu', 'fc7_jittered_relu',
        'fc8'
    ]

    nets['input_image'] = imgs
    nets['image_prepoc'] = img_preproc
    nets['logits'] = logits
    nets['min_image'] = min_image
    nets['max_image'] = max_image
    nets['subtract_value'] = subtract_value
    nets['multiply_value'] = multiply_value
    nets['visualization_input'] = nets['input_image']
    nets['predictions'] = tf.nn.softmax(logits)
    nets['class_labels_key'] = class_names
    nets['imagenet_logits'] = nets['logits']

    # Some of the tfslim networks have an offset for the class index. The saved VGG model does not.
    nets['class_index_offset'] = int(0)

    ### The following dictionaries and code are used to grab the activations of the conv layers ###
    ### before the non-linearity is applied, so that we can apply the modified gradient ReLU.   ###
    ### The activations after the modified gradient ReLU will be the same as the activations    ###
    ### after the normal gradient ReLU.                                                         ###

    # Get the pre-relu layers and add them to nets, format <layer_name_in_nets>:<name_in_graph>
    layers_pre_relu = {
        'conv1_2_prerelu': 'vgg_19/conv1/conv1_2/BiasAdd:0',
        'conv2_2_prerelu': 'vgg_19/conv2/conv2_2/BiasAdd:0',
        'conv3_4_prerelu': 'vgg_19/conv3/conv3_4/BiasAdd:0',
        'conv4_4_prerelu': 'vgg_19/conv4/conv4_4/BiasAdd:0',
        'conv5_4_prerelu': 'vgg_19/conv5/conv5_4/BiasAdd:0',
        'fc6_prerelu': 'vgg_19/fc6/BiasAdd:0',
        'fc7_prerelu': 'vgg_19/fc7/BiasAdd:0'
    }

    # remap some of the names in nets for easy access
    # for VGG, all of these values are after the non-linearity is applied
    nets['conv1_2'] = nets['vgg_19/conv1/conv1_2']
    nets['conv2_2'] = nets['vgg_19/conv2/conv2_2']
    nets['conv3_4'] = nets['vgg_19/conv3/conv3_4']
    nets['conv4_4'] = nets['vgg_19/conv4/conv4_4']
    nets['conv5_4'] = nets['vgg_19/conv5/conv5_4']
    nets['fc6'] = nets['vgg_19/fc6']
    nets['fc7'] = nets['vgg_19/fc7']
    nets['fc8'] = nets['vgg_19/fc8']

    add_jitter_layers = [
        'conv1_2', 'conv2_2', 'conv3_4', 'conv4_4', 'conv5_4', 'fc6', 'fc7'
    ]

    # This logic is helpful for networks such as ResNet, where we modify the relu for all of the layers
    # that will be concatenated at the end of the block.
    for layer_key, layer_name in layers_pre_relu.items():
        if type(layer_name) is list:
            concat_layers_mixed = []
            # Some of the mixed layers have layers that are mixed... need to jitter the relu for each?
            for concat_layer in layer_name:
                concat_layers_mixed.append(
                    tf.get_default_graph().get_tensor_by_name(concat_layer))
            nets[layer_key] = concat_layers_mixed
        else:
            nets[layer_key] = tf.get_default_graph().get_tensor_by_name(
                layer_name)

    for layer in add_jitter_layers:
        nets, net_layer_name = add_jitter_relu_to_layer_vgg(nets, layer)

    # Initialize the input variable, check if other things aren't initialized (useful for generating
    # metamers from a random network)
    uninitialized = tf.report_uninitialized_variables().eval(session=session)
    print('##### \n UNINITIALIZED VARIABLES ARE:')
    print(uninitialized)
    print('#####')
    all_variables = tf.global_variables()
    init_op = tf.variables_initializer([
        var for var in all_variables if any([
            var_name.decode('utf-8') in var.name
            for var_name in uninitialized.tolist()
        ])
    ])
    init_op.run(session=session)

    ### Remaining code block runs some sanity checks with an example image. ###

    # Pull in an example image that is classified correctly (it is an airplane from imagenet).
    image_path = 'assets/airplane.png'
    image_class = 'airliner'
    image_dict = metamer_helpers.use_image_path_specified_image(
        image_path, image_class=image_class, im_shape=224)

    # Normalize between 0-1, since our variables are normalized to those values.
    image_dict['image'] = (
        image_dict['image'] - image_dict['min_value_image_set']
    ) / (image_dict['max_value_image_set'] - image_dict['min_value_image_set'])

    eval_predictions = session.run(nets['predictions'],
                                   feed_dict={
                                       imgs: [image_dict['image']]
                                   }).ravel()
    sorted_predictions = np.argsort(eval_predictions)[::-1]
    prediction_check_msg = 'Predicted image for airliner example is %s with %f prob' % (
        class_names[sorted_predictions[0] + nets['class_index_offset']],
        eval_predictions[sorted_predictions[0]])
    predicted_class = class_names[sorted_predictions[0] +
                                  nets['class_index_offset']]
    assert predicted_class == image_class, prediction_check_msg

    # Make sure that the activations are the same between the normal relu and the modified gradient
    # relu for an example layer.
    same_layers = {
        'normal_relu': nets['conv3_4'],
        'modified_grad_relu': nets['conv3_4_jittered_relu']
    }
    check_relu = session.run(same_layers,
                             feed_dict={imgs: [image_dict['image']]})
    relu_check_msg = (
        'The activations after the modified gradient ReLU do not '
        'match the activations after the normal gradient ReLU.')
    assert np.all(check_relu['normal_relu'] ==
                  check_relu['modified_grad_relu']), relu_check_msg

    return nets, session, metamer_layers
Example #17
0
def run_hand(all_video_list, video_output_parent_path, use_bn, train_vgg,
             checkpoint_path, backbone_net_ckpt_path):
    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3])
        img_size = tf.placeholder(dtype=tf.int32,
                                  shape=(2, ),
                                  name='original_image_size')

    img_normalized = raw_img / 255 - 0.5

    # define vgg19
    with slim.arg_scope(vgg.vgg_arg_scope()):
        vgg_outputs, end_points = vgg.vgg_19(img_normalized)

    # get net graph
    logger.info('initializing model...')
    net = PafNet(inputs_x=vgg_outputs, hm_channel_num=2, use_bn=use_bn)
    hm_pre, added_layers_out = net.gen_hand_net()

    hm_up = tf.image.resize_area(hm_pre[5], img_size)
    # cpm_up = tf.image.resize_area(cpm_pre[5], img_size)
    # hm_up = hm_pre[5]
    # cpm_up = cpm_pre[5]
    smoother = Smoother({'data': hm_up}, 25, 3.0)
    gaussian_heatMat = smoother.get_output()

    max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat,
                                      window_shape=(3, 3),
                                      pooling_type='MAX',
                                      padding='SAME')
    tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor),
                            gaussian_heatMat, tf.zeros_like(gaussian_heatMat))

    logger.info('initialize saver...')
    # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers')
    # trainable_var_list = []
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19')

    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'),
                              name='vgg_restorer')
    saver = tf.train.Saver(trainable_var_list)
    logger.info('initialize session...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.group(tf.global_variables_initializer()))
        logger.info('restoring vgg weights...')
        restorer.restore(sess, backbone_net_ckpt_path)
        logger.info('restoring from checkpoint...')
        saver.restore(
            sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
        logger.info('initialization done')
        action_list = all_video_list.keys()
        for action in action_list:
            video_list = all_video_list[action]
            for video in video_list:
                dir_name = video.split('/')
                name = dir_name[-2]
                save_path = video_output_parent_path + '/' + name

                anno_loader = cut_body_part(anno_file=save_path + '/' +
                                            action + '_lstm.json',
                                            coco_images=save_path + '/pics/')
                img_info = []
                anno_info = []
                for img, hand_list, img_meta, anno in tqdm(
                        anno_loader.crop_part()):
                    for hand in hand_list:
                        position = hand['position']
                        ori_h = position[3] - position[1] + 1
                        ori_w = position[2] - position[0] + 1
                        peaks_origin, heatmap_origin = sess.run(
                            [
                                tensor_peaks,
                                hm_up,
                            ],
                            feed_dict={
                                raw_img: hand['hand'][np.newaxis, :, :, :],
                                img_size: [ori_h, ori_w]
                            })
                        re_origin = np.where(
                            peaks_origin[0, :, :,
                                         0] == np.max(peaks_origin[0, :, :,
                                                                   0]))
                        peaks_flip, heatmap_flip = sess.run(
                            [
                                tensor_peaks,
                                hm_up,
                            ],
                            feed_dict={
                                raw_img:
                                np.fliplr(hand['hand'][np.newaxis, :, :, :]),
                                img_size: [ori_h, ori_w]
                            })
                        peaks_flip = np.fliplr(peaks_flip)
                        re_flip = np.where(
                            peaks_flip[0, :, :,
                                       0] == np.max(peaks_flip[0, :, :, 0]))
                        anno['keypoints'][hand['idx'] * 3] = int(
                            position[0] +
                            (re_origin[1][0] + re_flip[1][0]) / 2)
                        anno['keypoints'][hand['idx'] * 3 + 1] = int(
                            position[1] +
                            (re_origin[0][0] + re_flip[0][0]) / 2)
                    anno_info.append(anno)
                    img_info.append(img_meta)
                ref = {"images": img_info, "annotations": anno_info}
                with open(
                        save_path + '/' + action + '.json'.split('.')[0] +
                        '_hand_coco' + '.json', "w") as f:
                    json.dump(ref, f)
                    print('writed to ' + save_path + '/' + action +
                          '.json'.split('.')[0] + '_hand_coco' + '.json')
Example #18
0
def run():
    #Create log_dir for evaluation information
    if not os.path.exists(log_eval):
        os.mkdir(log_eval)

    #Just construct the graph from scratch again
    with tf.Graph().as_default() as graph:
        tf.logging.set_verbosity(tf.logging.INFO)
        #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
        dataset = get_split('validation', dataset_dir)
        images, raw_images, labels = load_batch(dataset,
                                                batch_size=batch_size,
                                                is_training=False)
        imagescam, rcam, lcam = load_batch(dataset,
                                           batch_size=batch_size,
                                           is_training=False,
                                           cam=True)
        #Create some information about the training steps
        num_batches_per_epoch = dataset.num_samples / batch_size
        num_steps_per_epoch = num_batches_per_epoch

        #placedholders for CAM compute
        y_ = tf.placeholder(tf.int64, [None])
        x = tf.placeholder_with_default(images, (None, 224, 224, 3))

        #Now create the inference model but set is_training=False
        with slim.arg_scope(vgg_arg_scope()):
            logits, end_points = vgg_19(x,
                                        num_classes=dataset.num_classes,
                                        is_training=False,
                                        global_pool=True)

        #Get the class activation maps
        class_activation_map = get_class_map(1, end_points['vgg_19/conv6'],
                                             224)

        # #get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, checkpoint_file)

        #Just define the metrics to track without the loss or whatsoever
        predictions = tf.argmax(end_points['vgg_19/fc8'], 1)
        # accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
        accuracy, accuracy_update = tf.metrics.accuracy(labels, predictions)
        metrics_op = tf.group(accuracy_update)

        #Create the global step and an increment op for monitoring
        global_step = tf.train.get_or_create_global_step()
        global_step_op = tf.assign(
            global_step, global_step + 1
        )  #no apply_gradient method so manually increasing the global_step

        #placedholders for CAM compute
        y = logits

        #Create a evaluation step function
        def eval_step(sess, metrics_op, global_step):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            print 'start', start_time
            _ = sess.run(metrics_op)
            global_step_count = sess.run(global_step_op)
            accuracy_value = sess.run(accuracy)
            time_elapsed = time.time() - start_time

            #Log some information
            logging.info(
                'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)',
                global_step_count, accuracy_value, time_elapsed)

            print 'starting cam inspection'
            #produce and save CAMs every 10 steps
            inspect_class_activation_map(sess, class_activation_map,
                                         end_points['vgg_19/conv6'], imagescam,
                                         lcam, global_step_count, batch_size,
                                         x, y_, y)

            print 'ending cam inspection'
            return accuracy_value

        #Define some scalar quantities to monitor
        tf.summary.scalar('Validation_Accuracy', accuracy)
        my_summary_op = tf.summary.merge_all()

        #Get your supervisor
        #sv = tf.train.Supervisor(logdir = log_eval, summary_op = None, saver = None, init_fn = restore_fn)

        #global_step_tensor = tf.Variable(7530, trainable=False, name='global_step')
        #Now we are ready to run in one session
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            restore_fn(sess)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            while not coord.should_stop():

                #from tensorflow.python import debug as tfdb
                #sess = tfdb.LocalCLIDebugWrapperSession(sess)
                #tf.train.global_step(sess, global_step_tensor)
                for step in xrange(num_steps_per_epoch * num_epochs):
                    print global_step
                    sess.run(global_step)

                    #print vital information every start of the epoch as always
                    if step % num_batches_per_epoch == 0:
                        logging.info('Epoch: %s/%s',
                                     step / num_batches_per_epoch + 1,
                                     num_epochs)
                        logging.info('Current Streaming Accuracy: %.4f',
                                     sess.run(accuracy))

                    #Compute summaries every 10 steps and continue evaluating
                    if step % 10 == 0:
                        print 'mod 10'
                        eval_step(sess,
                                  metrics_op=metrics_op,
                                  global_step=global_step)
                        summaries = sess.run(my_summary_op)
                        #sv.summary_computed(sess, summaries)

                    #Otherwise just run as per normal
                    else:
                        print 'next step'
                        eval_step(sess,
                                  metrics_op=metrics_op,
                                  global_step=global_step)

            coord.request_stop()
            #At the end of all the evaluation, show the final accuracy
            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))

            #Now we want to visualize the last batch's images just to see what our model has predicted
            raw_images, labels, predictions = sess.run(
                [raw_images, labels, predictions])
            for i in range(10):
                image, label, prediction = raw_images[i], labels[
                    i], predictions[i]
                prediction_name, label_name = dataset.labels_to_name[
                    prediction], dataset.labels_to_name[label]
                text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name,
                                                               label_name)
                img_plot = plt.imshow(image)

                #Set up the plot and hide axes
                plt.title(text)
                img_plot.axes.get_yaxis().set_ticks([])
                img_plot.axes.get_xaxis().set_ticks([])
                plt.show()
            coord.request_stop()
            coord.join(threads)

            logging.info(
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.'
            )