Ejemplo n.º 1
0
def main():
    """Create the model and start the training."""
    writer = SummaryWriter(args.snapshot_dir)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = True

    deeplab = Res_CE2P(num_classes=args.num_classes)

    saved_state_dict = torch.load(args.restore_from)
    new_params = deeplab.state_dict().copy()

    for i in saved_state_dict:
        i_parts = i.split('.')
        if not i_parts[1] == 'layer5':
            new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
    if args.start_iters > 0:
        deeplab.load_state_dict(saved_state_dict)
    else:
        deeplab.load_state_dict(new_params)
    print(deeplab)
    model = ModelDataParallel(deeplab)
    model.train()
    model.float()
    model.cuda()

    criterion = CriterionCrossEntropyEdgeParsing()
    criterion = CriterionDataParallel(criterion)
    criterion.cuda()

    cudnn.benchmark = True

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(LIPParsingEdgeDataSet(
        args.data_dir,
        args.data_list,
        max_iters=args.num_steps * args.batch_size,
        crop_size=input_size,
        scale=args.random_scale,
        mirror=args.random_mirror,
        mean=IMG_MEAN),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=8,
                                  pin_memory=True)

    optimizer = optim.SGD(
        [{
            'params': filter(lambda p: p.requires_grad, deeplab.parameters()),
            'lr': args.learning_rate
        }],
        lr=args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)
    optimizer.zero_grad()

    for i_iter, batch in enumerate(trainloader):
        i_iter += args.start_iters
        images, labels, edges, _, _ = batch
        images = Variable(images.cuda())
        labels = Variable(labels.long().cuda())
        edges = Variable(edges.long().cuda())

        optimizer.zero_grad()
        lr = adjust_learning_rate(optimizer, i_iter)
        preds = model(images)
        loss = criterion(preds, [labels, edges])
        loss.backward()
        optimizer.step()

        if i_iter % 100 == 0:
            writer.add_scalar('learning_rate', lr, i_iter)
            writer.add_scalar('loss', loss.data.cpu().numpy(), i_iter)

        if i_iter % args.save_pred_every == 0:
            images_inv = inv_preprocess(images, args.save_num_images, IMG_MEAN)
            labels_colors = decode_labels(labels, args.save_num_images,
                                          args.num_classes)
            edges_colors = decode_labels(edges, args.save_num_images, 2)
            if isinstance(preds, list):
                preds = preds[0]
            preds_colors = decode_predictions(preds[1], args.save_num_images,
                                              args.num_classes)
            pred_edges_colors = decode_predictions(preds[2],
                                                   args.save_num_images,
                                                   args.num_classes)
            for index, (img, lab) in enumerate(zip(images_inv, labels_colors)):
                writer.add_image('Images/' + str(index), img, i_iter)
                writer.add_image('Labels/' + str(index), lab, i_iter)
                writer.add_image('preds/' + str(index), preds_colors[index],
                                 i_iter)
                writer.add_image('edges/' + str(index), edges_colors[index],
                                 i_iter)
                writer.add_image('pred_edges/' + str(index),
                                 pred_edges_colors[index], i_iter)

        print('iter = {} of {} completed, loss = {}, learning_rate = {:e}'.
              format(i_iter, args.num_steps,
                     loss.data.cpu().numpy(), lr))

        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(
                deeplab.state_dict(),
                osp.join(args.snapshot_dir,
                         'LIP_' + str(args.num_steps) + '.pth'))
            break

        if i_iter % args.save_pred_every == 0:
            print('taking snapshot ...')
            torch.save(
                deeplab.state_dict(),
                osp.join(args.snapshot_dir, 'LIP_' + str(i_iter) + '.pth'))

    end = timeit.default_timer()
    print(end - start, 'seconds')
Ejemplo n.º 2
0
def crf(fn_im, fn_anno, fn_output, num_classes=n_classes, use_2d=True):
    img = imread(fn_im)

    # Convert the annotation's RGB color to a single 32-bit integer color 0xBBGGRR
    anno_rgb = imread(fn_anno).astype(np.uint32)
    anno_lbl = anno_rgb[:, :, 0] + \
        (anno_rgb[:, :, 1] << 8) + (anno_rgb[:, :, 2] << 16)

    # Convert the 32bit integer color to 1, 2, ... labels.
    # Note that all-black, i.e. the value 0 for background will stay 0.
    colors, labels = np.unique(anno_lbl, return_inverse=True)

    # But remove the all-0 black, that won't exist in the MAP!
    # HAS_UNK = 0 in colors
    HAS_UNK = False

    # And create a mapping back from the labels to 32bit integer colors.
    colorize = np.empty((len(colors), 3), np.uint8)
    colorize[:, 0] = (colors & 0x0000FF)
    colorize[:, 1] = (colors & 0x00FF00) >> 8
    colorize[:, 2] = (colors & 0xFF0000) >> 16

    # Compute the number of classes in the label image.
    # We subtract one because the number shouldn't include the value 0 which stands

    if use_2d:
        # Setting up the CRF model

        d = dcrf.DenseCRF2D(img.shape[1], img.shape[0], num_classes)

        # get unary potentials (neg log probability)
        U = unary_from_labels(labels,
                              num_classes,
                              gt_prob=0.7,
                              zero_unsure=False)
        d.setUnaryEnergy(U)

        # This adds the color-independent term, features are the locations only.
        d.addPairwiseGaussian(sxy=(3, 3),
                              compat=3,
                              kernel=dcrf.DIAG_KERNEL,
                              normalization=dcrf.NORMALIZE_SYMMETRIC)

        # This adds the color-dependent term, i.e. features are (x,y,r,g,b).
        d.addPairwiseBilateral(sxy=(10, 10),
                               srgb=(13, 13, 13),
                               rgbim=img,
                               compat=10,
                               kernel=dcrf.DIAG_KERNEL,
                               normalization=dcrf.NORMALIZE_SYMMETRIC)

    else:
        # print("Using generic 2D functions")

        # Example using the DenseCRF class and the util functions
        d = dcrf.DenseCRF(img.shape[1] * img.shape[0], num_classes)

        # get unary potentials (neg log probability)
        U = unary_from_labels(labels,
                              num_classes,
                              gt_prob=0.7,
                              zero_unsure=HAS_UNK)
        d.setUnaryEnergy(U)

        # This creates the color-independent features and then add them to the CRF
        feats = create_pairwise_gaussian(sdims=(3, 3), shape=img.shape[:2])
        d.addPairwiseEnergy(feats,
                            compat=3,
                            kernel=dcrf.DIAG_KERNEL,
                            normalization=dcrf.NORMALIZE_SYMMETRIC)

        # This creates the color-dependent features and then add them to the CRF
        feats = create_pairwise_bilateral(sdims=(10, 10),
                                          schan=(13, 13, 13),
                                          img=img,
                                          chdim=2)
        d.addPairwiseEnergy(feats,
                            compat=10,
                            kernel=dcrf.DIAG_KERNEL,
                            normalization=dcrf.NORMALIZE_SYMMETRIC)

    # Run five inference steps.
    Q = d.inference(5)

    # Find out the most probable class for each pixel.
    MAP = np.argmax(Q, axis=0)

    # Convert the MAP (labels) back to the corresponding colors and save the image.
    # Note that there is no "unknown" here anymore, no matter what we had at first.
    MAP = colorize[MAP, :]

    crfimage = MAP.reshape(img.shape)

    msk = utils.decode_labels(crfimage, num_classes=num_classes)
    parsing_im = Image.fromarray(msk)
    parsing_im.save(fn_output + '_vis.png')
    cv2.imwrite(fn_output + '.png', crfimage[:, :, 0])
Ejemplo n.º 3
0
def process():
    target = os.path.join(APP_ROOT)

    # create image directory if not found
    if not os.path.isdir(target):
        os.mkdir(target)

    # retrieve file from html file-picker
    upload = request.files.getlist("image")[0]
    print("File name: {}".format(upload.filename))
    filename = upload.filename

    # file support verification
    ext = os.path.splitext(filename)[1]
    if (ext == ".jpg") or (ext == ".jpeg") or (ext == ".png") or (ext == ".bmp"):
        print("File accepted")
    else:
        return render_template("error.html", message="The selected file is not supported"), 400

    # save file
    destination = "/".join([target, filename])
    upload.save("static/images/temp_person_image.jpg")  
    
    im = Image.open("static/images/temp_person_image.jpg")
    if im.mode in ("RGBA", "P"):
      im = im.convert("RGB")
  
    im.save("datasets/CIHP/images/person.jpg")
    makedataset()
        # evaluate prosessing
    parsing_dir = 'dataset/parse_cihp'
    if os.path.exists(parsing_dir):
        shutil.rmtree(parsing_dir)
    if not os.path.exists(parsing_dir):
        os.makedirs(parsing_dir)
   
    # Iterate over training steps.
    for step in range(NUM_STEPS):
        parsing_, scores, edge_, _ = sess.run([pred_all, pred_scores, pred_edge, update_op])
        if step % 100 == 0:
            print('step {:d}'.format(step))
            print (image_list[step])
        img_split = image_list[step].split('/')
        img_id = img_split[-1][:-4]
        
        msk = decode_labels(parsing_, num_classes=N_CLASSES)
        parsing_im = Image.fromarray(msk[0])
        parsing_im.save('dataset/parse_cihp/person_vis.png')
        im=Image.open('dataset/parse_cihp/person_vis.png')
        new_width = 192
        new_height = 256
        im = im.resize((new_width,new_height),Image.ANTIALIAS)
        im.save('dataset/parse_cihp/person_vis.png')
        

        cv2.imwrite('dataset/parse_cihp/person.png', parsing_[0,:,:,0])
        im=Image.open('dataset/parse_cihp/person.png')
        im = im.resize((new_width,new_height),Image.ANTIALIAS)
        im.save('dataset/parse_cihp/person.png')
         
       
        #sio.savemat('{}/{}.mat'.format(parsing_dir, img_id), {'data': scores[0,:,:]})
        
        #cv2.imwrite('dataset/cloth_mask/person_mask.png', edge_[0,:,:,0] * 255)
      

    res_mIou = mIoU.eval(session=sess)
    res_macc = macc.eval(session=sess)
    res_recall = recall.eval(session=sess)
    res_precision = precision.eval(session=sess)
    f1 = 2 * res_precision * res_recall / (res_precision + res_recall)
    print('Mean IoU: {:.4f}, Mean Acc: {:.4f}'.format(res_mIou, res_macc))
    print('Recall: {:.4f}, Precision: {:.4f}, F1 score: {:.4f}'.format(res_recall, res_precision, f1))

    coord.request_stop()
    coord.join(threads)   
    return render_template('processing.html')
Ejemplo n.º 4
0
def main():
    """Create the model and start the evaluation process."""

    # Create queue coordinator.
    coord = tf.train.Coordinator()
    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(DATA_DIR, LIST_PATH, DATA_ID_LIST, None, False,
                             False, False, coord)
        image, label, edge_gt = reader.image, reader.label, reader.edge
        image_rev = tf.reverse(image, tf.stack([1]))
        image_list = reader.image_list

    image_batch = tf.stack([image, image_rev])
    label_batch = tf.expand_dims(label, dim=0)  # Add one batch dimension.
    edge_gt_batch = tf.expand_dims(edge_gt, dim=0)
    h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(
        tf.shape(image_batch)[2])
    image_batch050 = tf.image.resize_images(
        image_batch,
        tf.stack([
            tf.to_int32(tf.multiply(h_orig, 0.50)),
            tf.to_int32(tf.multiply(w_orig, 0.50))
        ]))
    image_batch075 = tf.image.resize_images(
        image_batch,
        tf.stack([
            tf.to_int32(tf.multiply(h_orig, 0.75)),
            tf.to_int32(tf.multiply(w_orig, 0.75))
        ]))
    image_batch125 = tf.image.resize_images(
        image_batch,
        tf.stack([
            tf.to_int32(tf.multiply(h_orig, 1.25)),
            tf.to_int32(tf.multiply(w_orig, 1.25))
        ]))
    image_batch150 = tf.image.resize_images(
        image_batch,
        tf.stack([
            tf.to_int32(tf.multiply(h_orig, 1.50)),
            tf.to_int32(tf.multiply(w_orig, 1.50))
        ]))
    image_batch175 = tf.image.resize_images(
        image_batch,
        tf.stack([
            tf.to_int32(tf.multiply(h_orig, 1.75)),
            tf.to_int32(tf.multiply(w_orig, 1.75))
        ]))

    # Create network.
    with tf.variable_scope('', reuse=False):
        net_100 = PGNModel({'data': image_batch},
                           is_training=False,
                           n_classes=N_CLASSES)
    with tf.variable_scope('', reuse=True):
        net_050 = PGNModel({'data': image_batch050},
                           is_training=False,
                           n_classes=N_CLASSES)
    with tf.variable_scope('', reuse=True):
        net_075 = PGNModel({'data': image_batch075},
                           is_training=False,
                           n_classes=N_CLASSES)
    with tf.variable_scope('', reuse=True):
        net_125 = PGNModel({'data': image_batch125},
                           is_training=False,
                           n_classes=N_CLASSES)
    with tf.variable_scope('', reuse=True):
        net_150 = PGNModel({'data': image_batch150},
                           is_training=False,
                           n_classes=N_CLASSES)
    with tf.variable_scope('', reuse=True):
        net_175 = PGNModel({'data': image_batch175},
                           is_training=False,
                           n_classes=N_CLASSES)
    # parsing net

    parsing_out1_050 = net_050.layers['parsing_fc']
    parsing_out1_075 = net_075.layers['parsing_fc']
    parsing_out1_100 = net_100.layers['parsing_fc']
    parsing_out1_125 = net_125.layers['parsing_fc']
    parsing_out1_150 = net_150.layers['parsing_fc']
    parsing_out1_175 = net_175.layers['parsing_fc']

    parsing_out2_050 = net_050.layers['parsing_rf_fc']
    parsing_out2_075 = net_075.layers['parsing_rf_fc']
    parsing_out2_100 = net_100.layers['parsing_rf_fc']
    parsing_out2_125 = net_125.layers['parsing_rf_fc']
    parsing_out2_150 = net_150.layers['parsing_rf_fc']
    parsing_out2_175 = net_175.layers['parsing_rf_fc']

    # edge net
    edge_out2_100 = net_100.layers['edge_rf_fc']
    edge_out2_125 = net_125.layers['edge_rf_fc']
    edge_out2_150 = net_150.layers['edge_rf_fc']
    edge_out2_175 = net_175.layers['edge_rf_fc']

    # combine resize
    parsing_out1 = tf.reduce_mean(tf.stack([
        tf.image.resize_images(parsing_out1_050,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out1_075,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out1_100,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out1_125,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out1_150,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out1_175,
                               tf.shape(image_batch)[1:3, ])
    ]),
                                  axis=0)

    parsing_out2 = tf.reduce_mean(tf.stack([
        tf.image.resize_images(parsing_out2_050,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out2_075,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out2_100,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out2_125,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out2_150,
                               tf.shape(image_batch)[1:3, ]),
        tf.image.resize_images(parsing_out2_175,
                               tf.shape(image_batch)[1:3, ])
    ]),
                                  axis=0)

    edge_out2_100 = tf.image.resize_images(edge_out2_100,
                                           tf.shape(image_batch)[1:3, ])
    edge_out2_125 = tf.image.resize_images(edge_out2_125,
                                           tf.shape(image_batch)[1:3, ])
    edge_out2_150 = tf.image.resize_images(edge_out2_150,
                                           tf.shape(image_batch)[1:3, ])
    edge_out2_175 = tf.image.resize_images(edge_out2_175,
                                           tf.shape(image_batch)[1:3, ])
    edge_out2 = tf.reduce_mean(tf.stack(
        [edge_out2_100, edge_out2_125, edge_out2_150, edge_out2_175]),
                               axis=0)

    raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2]), axis=0)
    head_output, tail_output = tf.unstack(raw_output, num=2, axis=0)
    tail_list = tf.unstack(tail_output, num=20, axis=2)
    tail_list_rev = [None] * 20
    for xx in xrange(14):
        tail_list_rev[xx] = tail_list[xx]
    tail_list_rev[14] = tail_list[15]
    tail_list_rev[15] = tail_list[14]
    tail_list_rev[16] = tail_list[17]
    tail_list_rev[17] = tail_list[16]
    tail_list_rev[18] = tail_list[19]
    tail_list_rev[19] = tail_list[18]
    tail_output_rev = tf.stack(tail_list_rev, axis=2)
    tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1]))

    raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]),
                                    axis=0)
    raw_output_all = tf.expand_dims(raw_output_all, dim=0)
    pred_scores = tf.reduce_max(raw_output_all, axis=3)
    raw_output_all = tf.argmax(raw_output_all, axis=3)
    pred_all = tf.expand_dims(raw_output_all, dim=3)  # Create 4-d tensor.

    raw_edge = tf.reduce_mean(tf.stack([edge_out2]), axis=0)
    head_output, tail_output = tf.unstack(raw_edge, num=2, axis=0)
    tail_output_rev = tf.reverse(tail_output, tf.stack([1]))
    raw_edge_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]),
                                  axis=0)
    raw_edge_all = tf.expand_dims(raw_edge_all, dim=0)
    pred_edge = tf.sigmoid(raw_edge_all)
    res_edge = tf.cast(tf.greater(pred_edge, 0.5), tf.int32)

    # prepare ground truth
    preds = tf.reshape(pred_all, [
        -1,
    ])
    gt = tf.reshape(label_batch, [
        -1,
    ])
    weights = tf.cast(
        tf.less_equal(gt, N_CLASSES - 1),
        tf.int32)  # Ignoring all labels greater than or equal to n_classes.
    mIoU, update_op_iou = tf.contrib.metrics.streaming_mean_iou(
        preds, gt, num_classes=N_CLASSES, weights=weights)
    macc, update_op_acc = tf.contrib.metrics.streaming_accuracy(
        preds, gt, weights=weights)

    # precision and recall
    recall, update_op_recall = tf.contrib.metrics.streaming_recall(
        res_edge, edge_gt_batch)
    precision, update_op_precision = tf.contrib.metrics.streaming_precision(
        res_edge, edge_gt_batch)

    update_op = tf.group(update_op_iou, update_op_acc, update_op_recall,
                         update_op_precision)

    # Which variables to load.
    restore_var = tf.global_variables()
    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)
    sess.run(tf.local_variables_initializer())

    # Load weights.
    loader = tf.train.Saver(var_list=restore_var)
    if RESTORE_FROM is not None:
        if load(loader, sess, RESTORE_FROM):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # evaluate prosessing
    parsing_dir = 'dataset/parse_cihp'
    if os.path.exists(parsing_dir):
        shutil.rmtree(parsing_dir)
    if not os.path.exists(parsing_dir):
        os.makedirs(parsing_dir)

    # Iterate over training steps.
    for step in range(NUM_STEPS):
        parsing_, scores, edge_, _ = sess.run(
            [pred_all, pred_scores, pred_edge, update_op])
        if step % 100 == 0:
            print('step {:d}'.format(step))
            print(image_list[step])
        img_split = image_list[step].split('/')
        img_id = img_split[-1][:-4]

        msk = decode_labels(parsing_, num_classes=N_CLASSES)
        parsing_im = Image.fromarray(msk[0])
        parsing_im.save('dataset/parse_cihp/person_vis.png')
        im = Image.open('dataset/parse_cihp/person_vis.png')
        new_width = 192
        new_height = 256
        im = im.resize((new_width, new_height), Image.ANTIALIAS)
        im.save('dataset/parse_cihp/person_vis.png')

        cv2.imwrite('dataset/parse_cihp/person.png', parsing_[0, :, :, 0])
        im = Image.open('dataset/parse_cihp/person.png')
        im = im.resize((new_width, new_height), Image.ANTIALIAS)
        im.save('dataset/parse_cihp/person.png')

        #sio.savemat('{}/{}.mat'.format(parsing_dir, img_id), {'data': scores[0,:,:]})

        #cv2.imwrite('dataset/cloth_mask/person_mask.png', edge_[0,:,:,0] * 255)

    res_mIou = mIoU.eval(session=sess)
    res_macc = macc.eval(session=sess)
    res_recall = recall.eval(session=sess)
    res_precision = precision.eval(session=sess)
    f1 = 2 * res_precision * res_recall / (res_precision + res_recall)
    print('Mean IoU: {:.4f}, Mean Acc: {:.4f}'.format(res_mIou, res_macc))
    print('Recall: {:.4f}, Precision: {:.4f}, F1 score: {:.4f}'.format(
        res_recall, res_precision, f1))

    coord.request_stop()
    coord.join(threads)