Beispiel #1
0
    def forward(self, x, target=None):
        # backbone
        _, _, C_5 = self.backbone(x)
        # _, _, C_5 = backbone(images)

        # head
        C_5 = self.SPP(C_5)
        C_5 = self.SAM(C_5)
        C_5 = self.conv_set(C_5)

        """
        SPP = SPP.cuda()
        SAM = SAM.cuda()
        conv_set = conv_set.cuda()
        C_5 = SPP(C_5)
        C_5 = SAM(C_5)
        C_5 = conv_set(C_5)
        
        """

        # pred
        prediction = self.pred(C_5)
        # prediction = pred(C_5)
        prediction = prediction.view(C_5.size(0), 1 + self.num_classes + 4, -1).permute(0, 2, 1)
        # prediction = prediction.view(C_5.size(0), 1 + 20 + 4, -1).permute(0, 2, 1)
        B, HW, C = prediction.size()

        # Divide prediction to obj_pred, txtytwth_pred and cls_pred   
        # [B, H*W, 1]
        conf_pred = prediction[:, :, :1]
        # [B, H*W, num_cls]
        cls_pred = prediction[:, :, 1 : 1 + self.num_classes]
        # cls_pred = prediction[:, :, 1: 1 + 20]
        # [B, H*W, 4]
        txtytwth_pred = prediction[:, :, 1 + self.num_classes:]
        # txtytwth_pred = prediction[:, :, 1 + 20:]


        # test
        if not self.trainable:
            with torch.no_grad():
                # batch size = 1
                all_conf = torch.sigmoid(conf_pred)[0]           # 0 is because that these is only 1 batch.
                all_bbox = torch.clamp((self.decode_boxes(txtytwth_pred) / self.scale_torch)[0], 0., 1.)
                all_class = (torch.softmax(cls_pred[0, :, :], 1) * all_conf)
                
                # separate box pred and class conf
                all_conf = all_conf.to('cpu').numpy()
                all_class = all_class.to('cpu').numpy()
                all_bbox = all_bbox.to('cpu').numpy()
                
                bboxes, scores, cls_inds = self.postprocess(all_bbox, all_class)

                return bboxes, scores, cls_inds
        else:
            conf_loss, cls_loss, txtytwth_loss, total_loss = tools.loss(pred_conf=conf_pred, pred_cls=cls_pred,
                                                                        pred_txtytwth=txtytwth_pred,
                                                                        label=target)

            return conf_loss, cls_loss, txtytwth_loss, total_loss
Beispiel #2
0
def train_running():
    with tf.name_scope('input'):

        train_batch, train_label_batch, _ = input_data.get_batch(train_txt, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
        val_batch, val_label_batch, _ = input_data.get_batch(val_txt, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int32, shape=[BATCH_SIZE])

    model = models.model(x, N_CLASSES)
    model.AlexNet()
    logits = model.fc3

    loss = tools.loss(logits, y_)
    acc = tools.accuracy(logits, y_)
    train_op = tools.optimize(loss, LEARNING_RATE)

    with tf.Session() as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
        val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph)

        try:
            for step in np.arange(MAX_STEP):
                if coord.should_stop():
                    break

                tra_images, tra_labels = sess.run([train_batch, train_label_batch])
                _, tra_loss, tra_acc = sess.run([train_op, loss, acc],
                                                feed_dict={x: tra_images, y_: tra_labels})

                if step % 50 == 0:
                    print('Step %d, train loss = %.4f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc))
                    summary_str = sess.run(summary_op, feed_dict={x: tra_images, y_: tra_labels})
                    train_writer.add_summary(summary_str, step)
                #
                if step % 200 == 0 or (step + 1) == MAX_STEP:
                    val_images, val_labels = sess.run([val_batch, val_label_batch])
                    val_loss, val_acc = sess.run([loss, acc],
                                                 feed_dict={x: val_images, y_: val_labels})

                    print('**  Step %d, val loss = %.4f, val accuracy = %.2f%%  **' % (step, val_loss, val_acc))
                    summary_str = sess.run(summary_op, feed_dict={x: val_images, y_: val_labels})
                    val_writer.add_summary(summary_str, step)
                    #
                if step % 2000 == 0 or (step + 1) == MAX_STEP:
                    checkpoint_path = os.path.join(model_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)

        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            coord.request_stop()
        coord.join(threads)
Beispiel #3
0
def run(batch_size=300, learning_rate=0.01):
    #region create network
    data, label = CifarInput.read_cifar10(
        r"C:\Projects\Programming\CsDiscount\cifar-10-binary", True,
        batch_size, True)
    logit = CapsNet.CapsNet(data, batch_size)
    reconstruction = tools.decoder(logit)
    reconstruction_p = tf.placeholder(dtype=tf.float32,
                                      shape=[batch_size, 32, 32, 3])
    print("Network Created")
    #endregion

    #region create optimizer
    global_step = tf.Variable(0, trainable=False, name="global_step")
    loss = tools.loss(logit, label, data, reconstruction_p, batch_size)
    accuracy = tools.accuracy(logit, label)
    train_op = tools.optimize(loss, learning_rate, global_step)
    print("Optimizer Created")
    #endregion

    #region create sessions, queues and savers
    sess = tf.Session()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()
    train_summary_writer = tf.summary.FileWriter(train_log_dir)
    sess.run(init)
    print("Sessions, Queues and Savers Created")
    #endregion

    for x in range(1000):
        print(x)
        reconstruction_run = sess.run(reconstruction)
        sess.run(train_op, feed_dict={reconstruction_p: reconstruction_run})
        if x % 5 == 0:
            mainwindow.newimg(reconstruction_run[0])

        if x % 100 == 0:
            print(sess.run(accuracy))
            checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
            saver.save(sess, save_path=checkpoint_path, global_step=x)
    def forward(self, x, target=None):
        # backbone
        _, _, C_5 = self.backbone(x)

        # pred
        prediction = self.pred(C_5)

        # 把[B, C, H, W]的预测变成了[B, HxW, C]
        prediction = prediction.view(C_5.size(0), 1 + self.num_classes + 4, -1).permute(0, 2, 1)
        B, HW, C = prediction.size()

        # [B, H*W, 1]
        conf_pred = prediction[:, :, :1]
        # [B, H*W, num_cls]
        cls_pred = prediction[:, :, 1: 1 + self.num_classes]
        # [B, H*W, 4]
        txtytwth_pred = prediction[:, :, 1 + self.num_classes:]

        # test
        if not self.trainable:
            with torch.no_grad():
                # bacth size = 1
                all_conf = torch.sigmoid(conf_pred)[0]
                # 所有值要在0~1之间
                all_bbox = torch.clamp((self.decode_boxes(txtytwth_pred) / self.scale_torch)[0], 0., 1.)

                all_class = (torch.softmax(cls_pred[0, :, :], 1) * all_conf)

                # separate box pred and class conf
                all_conf = all_conf.to('cpu').numpy()
                all_class = all_class.to('cpu').numpy()
                all_bbox = all_bbox.to('cpu').numpy()

                bboxes, scores, cls_inds = self.postprocess(all_bbox, all_class)

                return bboxes, scores, cls_inds
        else:
            conf_loss, cls_loss, txtytwth_loss, total_loss = tools.loss()

            return conf_loss, cls_loss, txtytwth_loss, total_loss
Beispiel #5
0
    def forward(self, x, target=None):
        # backbone
        C4, C5 = self.backbone(x)

        # head
        C5 = self.convsets_1(C5)

        # route from 16th layer in darknet
        C4 = self.reorg(self.route_layer(C4))

        # route concatenate
        C5 = torch.cat([C4, C5], dim=1)
        C5 = self.convsets_2(C5)
        prediction = self.pred_(C5)

        B, abC, H, W = prediction.size()

        # [B, anchor_n * C, N, M] -> [B, N, M, anchor_n * C] -> [B, N*M, anchor_n*C]
        prediction = prediction.permute(0, 2, 3, 1).contiguous().view(B, H*W, abC)

        # Divide prediction to obj_pred, xywh_pred and cls_pred   
        # [B, H*W*anchor_n, 1]
        conf_pred = prediction[:, :, :1 * self.anchor_number].contiguous().view(B, H*W*self.anchor_number, 1)
        # [B, H*W, anchor_n, num_cls]
        cls_pred = prediction[:, :, 1 * self.anchor_number : (1 + self.num_classes) * self.anchor_number].contiguous().view(B, H*W*self.anchor_number, self.num_classes)
        # [B, H*W, anchor_n, 4]
        txtytwth_pred = prediction[:, :, (1 + self.num_classes) * self.anchor_number:].contiguous()
        
        # test
        if not self.trainable:
            txtytwth_pred = txtytwth_pred.view(B, H*W, self.anchor_number, 4)
            with torch.no_grad():
                # batch size = 1                
                all_obj = torch.sigmoid(conf_pred)[0]           # 0 is because that these is only 1 batch.
                all_bbox = torch.clamp((self.decode_boxes(txtytwth_pred) / self.scale_torch)[0], 0., 1.)
                all_class = (torch.softmax(cls_pred[0, :, :], 1) * all_obj)
                # separate box pred and class conf
                all_obj = all_obj.to('cpu').numpy()
                all_class = all_class.to('cpu').numpy()
                all_bbox = all_bbox.to('cpu').numpy()

                bboxes, scores, cls_inds = self.postprocess(all_bbox, all_class)

                return bboxes, scores, cls_inds

        else:
            txtytwth_pred = txtytwth_pred.view(B, H*W, self.anchor_number, 4)
            # decode bbox, and remember to cancel its grad since we set iou as the label of objectness.
            with torch.no_grad():
                x1y1x2y2_pred = (self.decode_boxes(txtytwth_pred) / self.scale_torch).view(-1, 4)

            txtytwth_pred = txtytwth_pred.view(B, H*W*self.anchor_number, 4)

            x1y1x2y2_gt = target[:, :, 7:].view(-1, 4)

            # compute iou
            iou = tools.iou_score(x1y1x2y2_pred, x1y1x2y2_gt).view(B, H*W*self.anchor_number, 1)
            # print(iou.min(), iou.max())

            # we set iou between pred bbox and gt bbox as conf label. 
            # [obj, cls, txtytwth, x1y1x2y2] -> [conf, obj, cls, txtytwth]
            target = torch.cat([iou, target[:, :, :7]], dim=2)

            conf_loss, cls_loss, txtytwth_loss, total_loss = tools.loss(pred_conf=conf_pred, pred_cls=cls_pred,
                                                                        pred_txtytwth=txtytwth_pred,
                                                                        label=target,
                                                                        num_classes=self.num_classes)

            return conf_loss, cls_loss, txtytwth_loss, total_loss
def loss(xs, xt, ys):
    
    def mean_sigmoid_cross_entropy_with_logits(logit, truth):
            '''
            truth: 0. or 1.
            '''
            return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,
                                                                          labels=truth * tf.ones_like(logit)))
        
    #source domain
    with tf.variable_scope("Encoder"):
        z_mu, z_lv, y_predict = Encoder(xs, z_dim=256,n_class=10)

    z = tools.GaussianSampleLayer(z_mu, z_lv)
    # direct sample
    z_direct = tf.random_normal(shape=tf.shape(z_mu))

    with tf.variable_scope("Generator"):
        xz = Generator(z_direct)
    with tf.variable_scope("Discriminator"):
        logit_fake_xz,_ = Discriminator(xz)
    #
    with tf.variable_scope("Generator", reuse=True):
        xh = Generator(z)
    with tf.variable_scope("Discriminator", reuse=True):
        logit_true, x_through_D = Discriminator(xs, reuse=True)
    with tf.variable_scope("Discriminator", reuse=True):
        logit_fake, xh_through_D = Discriminator(xh, reuse=True)
    
    # target domain
    with tf.variable_scope("Encoder", reuse=True):
        z_mu_t,_, yt_predict = Encoder(xt, z_dim=256,n_class=10)
    with tf.variable_scope("Generator", reuse=True):
        xh_t = Generator(z_mu_t, reuse=True)
    with tf.variable_scope("Discriminator", reuse=True):
        logit_fake_t, _ = Discriminator(xh_t, reuse=True)
    
    loss = dict()
    loss['D_real'] = \
                mean_sigmoid_cross_entropy_with_logits(logit_true, 1.)
    loss['D_fake'] = 0.5 * (
                mean_sigmoid_cross_entropy_with_logits(logit_fake, 0.) +\
                mean_sigmoid_cross_entropy_with_logits(logit_fake_xz, 0.))
    loss['G_fake'] = 0.5 * (
                mean_sigmoid_cross_entropy_with_logits(logit_fake, 1.) +\
                mean_sigmoid_cross_entropy_with_logits(logit_fake_xz, 1.))
    loss['KL(z)'] = tf.reduce_mean(
                    tools.GaussianKLD(
                        z_mu, z_lv,
                        tf.zeros_like(z_mu), tf.zeros_like(z_lv)))
    
    loss['Dis'] = - tf.reduce_mean(
                    tools.GaussianLogDensity(
                        x_through_D,
                        xh_through_D,
                        tf.zeros_like(xh_through_D)))
    ys = tf.cast(ys, tf.float32)
    y_predict= tf.cast(y_predict, tf.float32)
    loss['C_predict'] = tools.loss(ys,y_predict )
    loss['MMD'] = tf.abs(tf.reduce_mean(tf.abs(z_mu-z_mu_t)))
    loss['D_fake_t'] = mean_sigmoid_cross_entropy_with_logits(logit_fake_t, 0.)
    
    return loss, yt_predict
Beispiel #7
0
def mytrain():
    # pre_trained_weights = './VGG16_pretrain/vgg16.npy'
    data_dir = '/content/data/'
    train_log_dir = './logs2/train/'
    val_log_dir = './logs2/val/'

    with tf.name_scope('input'):
        train_image_batch, train_label_batch = input_data.read_cifar10(
            data_dir, is_train=True, batch_size=BATCH_SIZE, shuffle=True)

        val_image_batch, val_label_batch = input_data.read_cifar10(
            data_dir, is_train=False, batch_size=BATCH_SIZE, shuffle=False)

    logits = VGG.Myvgg(train_image_batch, N_CLASSES, IS_PRETRAIN)
    loss = tools.loss(logits, train_label_batch)
    accuracy = tools.accuracy(logits, train_label_batch)
    my_global_step = tf.Variable(0, trainable=False, name='global_step')
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    x = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, IMG_H, IMG_W, 3])
    y_ = tf.placeholder(dtype=tf.int32, shape=[BATCH_SIZE, N_CLASSES])

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    # load pretrain weights
    # tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    train_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            train_images, train_labels = sess.run(
                [train_image_batch, train_label_batch])
            #print(train_images.shape,train_labels)
            _, train_loss, train_accuracy = sess.run(
                [train_op, loss, accuracy],
                feed_dict={
                    x: train_images,
                    y_: train_labels
                })

            if step % 128 == 0 or (step + 1) == MAX_STEP:
                print("Step: %d, loss: %.8f, accuracy: %.4f%%" %
                      (step, train_loss, train_accuracy))

                summary_str = sess.run(summary_op)
                train_summary_writer.add_summary(summary_str, step)

            if step % 128 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run(
                    [val_image_batch, val_label_batch])
                val_loss, val_accuracy = sess.run([loss, accuracy],
                                                  feed_dict={
                                                      x: val_images,
                                                      y_: val_labels
                                                  })
                print("** Step: %d, loss: %.8f, test_accuracy: %.4f%%" %
                      (step, val_loss, val_accuracy))
                summary_str = sess.run(summary_op)
                val_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, save_path=checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limited reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
def test(test_dir, checkpoint_dir='./checkpoint/'):
    import json
    # predict the result
    test_images = os.listdir(test_dir)
    features = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    labels = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])
    # one_hot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=80)
    # train_step, cross_entropy, logits, keep_prob = network.inference(features, one_hot_labels)
    resnet = ResNet.ResNet()
    _, logits = resnet.build(features, N_CLASSES, last_layer_type="softmax")
    loss = tools.loss(logits, labels)
    accuracy = tools.accuracy(logits, labels)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)
    values, indices = tf.nn.top_k(logits, 3)

    keep_prob = tf.placeholder(tf.float32)

    with tf.Session() as sess:
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print('Restore the model from checkpoint %s' %
                  ckpt.model_checkpoint_path)
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
            start_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            raise Exception('no checkpoint find')

        result = []
        test_imglist = []
        for test_image in test_images:
            test_imgpath = os.path.join(test_dir, test_image)
            test_imglist.append(test_imgpath)
        image = tf.cast(test_imglist, tf.string)

        # make a input queue
        input_queue = tf.train.slice_input_producer([image])

        image_contents = tf.read_file(input_queue[0])
        image = tf.image.decode_jpeg(image_contents, channels=3)

        #################################################
        # data agumentation should go to here
        #################################################
        image = tf.image.resize_image_with_crop_or_pad(image, IMG_W, IMG_H)

        image = tf.image.per_image_standardization(image)
        #    image_batch, label_batch = tf.train.batch([image, label],
        #                                              batch_size=batch_size,
        #                                              num_threads=64,
        #                                              capacity=capacipy)

        image_batch = tf.train.shuffle_batch([image],
                                             batch_size=1,
                                             num_threads=64,
                                             capacity=CAPACITY,
                                             min_after_dequeue=200)
        image_batch = tf.cast(image_batch, tf.float32)
        img = sess.run([image_batch])

        for i in range(len(img)):
            x = img[i]

            temp_dict = {}
            # x = scene_input.img_resize(os.path.join(test_dir, test_image), IMG_W)

            predictions = np.squeeze(sess.run(indices,
                                              feed_dict={
                                                  features:
                                                  np.expand_dims(x, axis=0),
                                                  keep_prob:
                                                  1
                                              }),
                                     axis=0)
            temp_dict['image_id'] = test_image
            temp_dict['label_id'] = predictions.tolist()
            result.append(temp_dict)
            print('image %s is %d,%d,%d' %
                  (test_image, predictions[0], predictions[1], predictions[2]))

        with open('submit.json', 'w') as f:
            json.dump(result, f)
            print('write result json, num is %d' % len(result))
Beispiel #9
0
    with tf.name_scope('weight4'):
        weight4 = tools.variable_with_weight_loss(shape=[384, 192], stddev=0.04, w1=0.004)
    with tf.name_scope('bias4'):
        bias4 = tf.Variable(tf.constant(0.1, shape=[192]))
    local4 = tf.nn.relu(tf.matmul(local3, weight4) + bias4)
with tf.name_scope('inference'):
    with tf.name_scope('weight5'):
        weight5 = tools.variable_with_weight_loss(shape=[192, 10], stddev=1/192.0, w1=0.0)
    with tf.name_scope('bias5'):
        bias5 = tf.Variable(tf.constant(0.0, shape=[10]))
    logits = tf.add(tf.matmul(local4, weight5), bias5)


with tf.name_scope('loss_func'):
    #求出全部的loss
    loss = tools.loss(logits, label_holder)
    tf.summary.scalar('loss', loss)

with tf.name_scope('train_step'):
    #调用优化方法Adam,这里学习率是直接设定的自行可以decay尝试一下
    # train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=step)
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=step)
    top_k_op = tf.nn.in_top_k(logits, label_holder, 1)

#创建会话
sess = tf.InteractiveSession()
#变量初始化
tf.global_variables_initializer().run()
#合并全部的summary
merged = tf.summary.merge_all()
#将日志文件写入LOG_DIR中
def train(net, device):
    global cfg, hr
    # set GPU

    use_focal = False
    if args.use_focal == 1:
        print("Let's use focal loss for objectness !!!")
        use_focal = True

    if args.multi_scale == 1:
        print('Let us use the multi-scale trick.')
        ms_inds = range(len(cfg['multi_scale']))
        dataset = COCODataset(data_dir=data_dir,
                              img_size=608,
                              transform=SSDAugmentation([608, 608], MEANS),
                              debug=args.debug)
    else:
        dataset = COCODataset(data_dir=data_dir,
                              img_size=cfg['min_dim'][0],
                              transform=SSDAugmentation(cfg['min_dim'], MEANS),
                              debug=args.debug)

    print("Setting Arguments.. : ", args)
    print("----------------------------------------------------------")
    print('Loading the MSCOCO dataset...')
    print('Training model on:', dataset.name)
    print('The dataset size:', len(dataset))
    print('The obj weight : ', args.obj)
    print('The noobj weight : ', args.noobj)
    print("----------------------------------------------------------")

    input_size = cfg['min_dim']
    num_classes = args.num_classes
    batch_size = args.batch_size
    save_folder = args.save_folder

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    # using tfboard
    from tensorboardX import SummaryWriter
    c_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    log_path = 'log/yolo_v2/coco/' + c_time
    if not os.path.exists(log_path):
        os.mkdir(log_path)
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    writer = SummaryWriter(log_path)

    if args.high_resolution == 1:
        hr = True

    print('Let us train yolo-v2 on the MSCOCO dataset ......')

    model = net
    model.to(device).train()

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             collate_fn=detection_collate,
                                             num_workers=args.n_cpu)

    evaluator = COCOAPIEvaluator(data_dir=data_dir,
                                 img_size=cfg['min_dim'],
                                 device=device,
                                 transform=BaseTransform(
                                     cfg['min_dim'], MEANS))

    # optimizer setup
    # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum,
    #                       dampening=0, weight_decay=args.weight_decay)
    # optimizer = optim.Adam(model.parameters())
    lr = args.lr
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr)

    step_index = 0
    epoch_size = len(dataset) // args.batch_size
    # each part of loss weight
    obj_w = 1.0
    cla_w = 1.0
    box_w = 2.0

    # start training loop
    iteration = 0
    for epoch in range(cfg['max_epoch']):
        batch_iterator = iter(dataloader)

        # No WarmUp strategy or WarmUp tage has finished.
        if epoch in cfg['lr_epoch']:
            step_index += 1
            lr = adjust_learning_rate(optimizer, args.gamma, step_index)

        # COCO evaluation
        if (epoch + 1) % args.eval_epoch == 0:
            model.trainable = False
            ap50_95, ap50 = evaluator.evaluate(model)
            print('ap50 : ', ap50)
            print('ap90_95 : ', ap50_95)
            model.trainable = True
            model.train()
            writer.add_scalar('val/COCOAP50', ap50, epoch + 1)
            writer.add_scalar('val/COCOAP50_95', ap50_95, epoch + 1)

        # subdivision loop
        optimizer.zero_grad()
        for images, targets in batch_iterator:
            iteration += 1

            # multi-scale trick
            if iteration % 10 == 0 and args.multi_scale == 1:
                ms_ind = random.sample(ms_inds, 1)[0]
                input_size = cfg['multi_scale'][int(ms_ind)]

            # multi scale
            if args.multi_scale == 1:
                images = torch.nn.functional.interpolate(images,
                                                         size=input_size,
                                                         mode='bilinear',
                                                         align_corners=True)

            targets = [label.tolist() for label in targets]
            if args.version == 'yolo_v2':
                targets = tools.gt_creator(input_size,
                                           yolo_net.stride,
                                           args.num_classes,
                                           targets,
                                           name='COCO')
            elif args.version == 'yolo_v3':
                targets = tools.multi_gt_creator(input_size,
                                                 yolo_net.stride,
                                                 args.num_classes,
                                                 targets,
                                                 name='COCO')

            targets = torch.tensor(targets).float().to(device)

            t0 = time.time()
            out = model(images.to(device))
            obj_loss, class_loss, box_loss = tools.loss(
                out,
                targets,
                num_classes=args.num_classes,
                use_focal=use_focal,
                obj=args.obj,
                noobj=args.noobj)
            total_loss = obj_w * obj_loss + cla_w * class_loss + box_w * box_loss

            # viz loss
            writer.add_scalar('object loss', obj_loss.item(), iteration)
            writer.add_scalar('class loss', class_loss.item(), iteration)
            writer.add_scalar('local loss', box_loss.item(), iteration)
            writer.add_scalar('total loss', total_loss.item(), iteration)
            # backprop
            total_loss.backward()
            optimizer.step()
            t1 = time.time()

            if iteration % 10 == 0:
                print('timer: %.4f sec.' % (t1 - t0))
                # print(obj_loss.item(), class_loss.item(), box_loss.item())
                print('Epoch[%d / %d]' % (epoch+1, cfg['max_epoch']) + ' || iter ' + repr(iteration) + \
                      ' || Loss: %.4f ||' % (total_loss.item()) + ' || lr: %.8f ||' % (lr) + ' || input size: %d ||' % input_size[0], end=' ')

        if (epoch + 1) % 10 == 0:
            print('Saving state, epoch:', epoch + 1)
            torch.save(
                yolo_net.state_dict(), save_folder + '/' + args.version + '_' +
                repr(epoch + 1) + '.pth')
Beispiel #11
0
def train():
    ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)

    train_tfrecords = TFRECORDS_PATH + 'train2.tfrecords'
    val_tfrecords = TFRECORDS_PATH + 'val.tfrecords'

    train_image_batch, train_label_batch = input_data.get_batch(
        train_tfrecords, BATCH_SIZE)
    val_image_batch, val_label_batch = input_data.get_batch(
        val_tfrecords, BATCH_SIZE)

    x = tf.placeholder(tf.float32, [BATCH_SIZE, WIDTH, HEIGHT, CHANNEL],
                       name='x-input')
    y_ = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES], name='y-input')

    if ckpt and ckpt.model_checkpoint_path:
        num_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        num_step = int(num_step)
        print(num_step)
        global_step = tf.Variable(num_step, trainable=False)
    else:
        global_step = tf.Variable(0, trainable=False)

    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    y = model.inference(x, NUM_CLASSES, regularizer=regularizer)
    train_loss = tools.loss(logits=y, labels=y_)
    train_acc = tools.accuracy(logits=y, labels=y_)
    train_op = tools.optimizer(train_loss,
                               LEARNING_RATE,
                               global_step=global_step)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            tf.global_variables_initializer().run()

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(LOG_TRAIN_PATH, sess.graph)
        val_writer = tf.summary.FileWriter(LOG_VAL_PATH, sess.graph)

        try:
            for i in range(TRAINING_STEPS):
                if coord.should_stop():
                    break

                xs, ys = sess.run([train_image_batch, train_label_batch])
                _, loss_value, acc_value, step = sess.run(
                    [train_op, train_loss, train_acc, global_step],
                    feed_dict={
                        x: xs,
                        y_: ys
                    })

                if i % 1 == 0:
                    print(
                        "After %d training step(s), loss on training batch is %g, accuracy is %g"
                        % (step, loss_value, acc_value))

                if i % 1 == 0:
                    summary_str = sess.run(summary_op,
                                           feed_dict={
                                               x: xs,
                                               y_: ys
                                           })
                    train_writer.add_summary(summary_str, step)

                if i % 5 == 0:
                    val_xs, val_ys = sess.run(
                        [val_image_batch, val_label_batch])
                    val_loss_value, val_acc_value = sess.run(
                        [train_loss, train_acc],
                        feed_dict={
                            x: val_xs,
                            y_: val_ys
                        })
                    print(
                        "After %d training step(s), valuation loss is %g, accuracy is %g"
                        % (step, val_loss_value, val_acc_value))
                    summary_str = sess.run(summary_op,
                                           feed_dict={
                                               x: val_xs,
                                               y_: val_ys
                                           })
                    val_writer.add_summary(summary_str, step)

                if i % 10 == 0 or step + 1 == TRAINING_STEPS:
                    saver.save(sess,
                               os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                               global_step=global_step)

        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')

        finally:
            coord.request_stop()

        coord.join(threads)
Beispiel #12
0
def train():
    pre_trained_weights = '/home/xiaoyi/data/LED/VGG16_pretrained/vgg16.npy'
    large_dir = '/home/xiaoyi/data/LED/data/train/train_large_crop/'
    small_dir = '/home/xiaoyi/data/LED/data/train/train_small_crop/'
    val_large_dir = '/home/xiaoyi/data/LED/test/test_large/'
    val_small_dir = '/home/xiaoyi/data/LED/test/test_small/'
    train_log_dir = '/home/xiaoyi/data/LED/logs1/train/'
    val_log_dir = '/home/xiaoyi/data/LED/logs1/val/'

    with tf.name_scope('input'):
        train, train_laebl = input_data.get_files(large_dir, small_dir)
        train_batch, train_label_batch = input_data.get_batch(
            train, train_laebl, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
        val, val_label = input_data.get_files(val_large_dir, val_small_dir)
        val_batch, val_label_batch = input_data.get_batch(
            val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

    logits = VGG.VGG16N(train_batch, N_CLASSES, IS_PRETRAIN)
    loss = tools.loss(logits, train_label_batch)
    accuracy = tools.accuracy(logits, train_label_batch)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            tra_images, tra_labels = sess.run([train_batch, train_label_batch])
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                            feed_dict={
                                                x: tra_images,
                                                y_: tra_labels
                                            })

            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('Step: %d,loss:%.4f,accuracy:%.4f%%' %
                      (step, tra_loss, tra_acc))
                summary_str = sess.run(summary_op)
                tra_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run([val_batch, val_label_batch])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 x: val_images,
                                                 y_: val_labels
                                             })
                print('** Step %d,val loss = %.2f,val accuracy = %.2f%% **' %
                      (step, val_loss, val_acc))

                summary_str = sess.run(summary_op)
                val_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')

    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
Beispiel #13
0
        outputs = tools.FC_layer('fc6',
                                 outputs,
                                 out_nodes=1024,
                                 activaction_function=tf.nn.relu)
        outputs = tools.batch_norm(outputs)
        outputs = tools.FC_layer('fc7',
                                 outputs,
                                 out_nodes=1024,
                                 activaction_function=tf.nn.relu)
        outputs = tools.batch_norm(outputs)
        logits = tools.FC_layer('fc8',
                                outputs,
                                out_nodes=10,
                                activaction_function=tf.nn.softmax)

    loss = tools.loss(logits, y_)
    accuracy = tools.accuracy(logits, y_)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    summary_op = tf.summary.merge_all()
    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)

    for step in range(MAX_STEP):
        batch_xs, batch_ys = mnist.train.next_batch(100)  #采用minbatch自助放回采样
        _train_r, _train_acc, _train_loss = sess.run(
            [train_op, accuracy, loss], feed_dict={
                x: batch_xs,
def train(net, device):
    global cfg, hr
    # set GPU

    use_focal = False
    if args.use_focal:
        print("Let's use focal loss for objectness !!!")
        use_focal = True

    if args.multi_scale:
        print('Let us use the multi-scale trick.')
        ms_inds = range(len(cfg['multi_scale']))
        dataset = COCODataset(
                    data_dir=data_dir,
                    img_size=608,
                    transform=SSDAugmentation([608, 608], mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)),
                    debug=args.debug)
    else:
        dataset = COCODataset(
                    data_dir=data_dir,
                    img_size=cfg['min_dim'][0],
                    transform=SSDAugmentation(cfg['min_dim'], mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)),
                    debug=args.debug)
    
    print("Setting Arguments.. : ", args)
    print("----------------------------------------------------------")
    print('Loading the MSCOCO dataset...')
    print('Training model on:', dataset.name)
    print('The dataset size:', len(dataset))
    print('The obj weight : ', args.obj)
    print('The noobj weight : ', args.noobj)
    print("----------------------------------------------------------")

    input_size = cfg['min_dim']
    num_classes = args.num_classes
    batch_size = args.batch_size

    os.makedirs(args.save_folder + args.version, exist_ok=True)

    # using tfboard
    from tensorboardX import SummaryWriter
    c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
    log_path = 'log/coco/' + c_time
    os.makedirs(log_path, exist_ok=True)

    writer = SummaryWriter(log_path)

    if args.high_resolution == 1:
        hr = True

    print('Let us train yolo-v2 on the MSCOCO dataset ......')
    
    model = net
    model.to(device).train()

    dataloader = torch.utils.data.DataLoader(
                    dataset, 
                    batch_size=batch_size, 
                    shuffle=True, 
                    collate_fn=detection_collate,
                    num_workers=args.n_cpu)

    evaluator = COCOAPIEvaluator(
                    data_dir=data_dir,
                    img_size=cfg['min_dim'],
                    device=device,
                    transform=BaseTransform(cfg['min_dim'], MEANS)
                    )

    # optimizer setup
    lr = args.lr
    # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
    #                                         weight_decay=args.weight_decay)
    # optimizer = optim.Adam(model.parameters())
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr)

    step_index = 0
    epoch_size = len(dataset) // args.batch_size
    # each part of loss weight
    obj_w = 1.0
    cla_w = 1.0
    box_w = 1.0

    # start training loop
    iteration = 0
    t0 = time.time()

    for epoch in range(cfg['max_epoch']):
        batch_iterator = iter(dataloader)

        # use cos lr
        if args.cos and epoch > 20 and epoch <= cfg['max_epoch'] - 20:
            # use cos lr
            lr = cos_lr(optimizer, epoch, cfg['max_epoch'])
        elif args.cos and epoch > cfg['max_epoch'] - 20:
            lr = 0.00001  
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        # use step lr
        else:
            if epoch in cfg['lr_epoch']:
                step_index += 1
                lr = adjust_learning_rate(optimizer, args.gamma, step_index)
    
        # COCO evaluation
        if (epoch + 1) % args.eval_epoch == 0:
            model.trainable = False
            ap50_95, ap50 = evaluator.evaluate(model)
            print('ap50 : ', ap50)
            print('ap50_95 : ', ap50_95)
            model.trainable = True
            model.train()
            writer.add_scalar('val/COCOAP50', ap50, epoch + 1)
            writer.add_scalar('val/COCOAP50_95', ap50_95, epoch + 1)

        # subdivision loop
        for images, targets in batch_iterator:
            # WarmUp strategy for learning rate
            if not args.no_warm_up == 'yes':
                if epoch < args.wp_epoch:
                    lr = warmup_strategy(optimizer, epoch_size, iteration)

            iteration += 1
        
            # multi-scale trick
            if iteration % 10 == 0 and args.multi_scale:
                ms_ind = random.sample(ms_inds, 1)[0]
                input_size = cfg['multi_scale'][int(ms_ind)]
            
            # multi scale
            if args.multi_scale:
                images = torch.nn.functional.interpolate(images, size=input_size, mode='bilinear', align_corners=True)

            targets = [label.tolist() for label in targets]
            if args.version == 'yolo_v2' or args.version == 'tiny_yolo_v2':
                targets = tools.gt_creator(input_size, yolo_net.stride, targets, name='COCO')
            elif args.version == 'yolo_v3' or args.version == 'tiny_yolo_v3':
                targets = tools.multi_gt_creator(input_size, yolo_net.stride, targets, name='COCO')

            targets = torch.tensor(targets).float().to(device)

            out = model(images.to(device))

            optimizer.zero_grad()

            obj_loss, class_loss, box_loss = tools.loss(out, targets, num_classes=args.num_classes, 
                                                        use_focal=use_focal,
                                                        obj=args.obj,
                                                        noobj=args.noobj)
            total_loss = obj_w * obj_loss + cla_w * class_loss + box_w * box_loss

            # viz loss
            writer.add_scalar('object loss', obj_loss.item(), iteration)
            writer.add_scalar('class loss', class_loss.item(), iteration)
            writer.add_scalar('local loss', box_loss.item(), iteration)
            writer.add_scalar('total loss', total_loss.item(), iteration)
            # backprop
            total_loss.backward()        
            optimizer.step()

            if iteration % 10 == 0:
                t1 = time.time()
                print('[Epoch %d/%d][Iter %d][lr %.8f]'
                    '[Loss: obj %.2f || cls %.2f || bbox %.2f || total %.2f || imgsize %d || time: %.2f]'
                        % (epoch+1, cfg['max_epoch'], iteration, lr,
                            obj_loss.item(), class_loss.item(), box_loss.item(), total_loss.item(), input_size[0], t1-t0),
                        flush=True)

                t0 = time.time()


        if (epoch + 1) % 10 == 0:
            print('Saving state, epoch:', epoch + 1)
            torch.save(yolo_net.state_dict(), os.path.join(args.save_folder + args.version, 
                        args.version + '_' + repr(epoch + 1) + '.pth')
                        )  
def train_aid():
    pre_trained_weights = r'/media/jsl/ubuntu/pretrain_weight/vgg16.npy'
    data_train_dir = os.path.join(config.aid_data_root_path, 'train')
    data_test_dir = os.path.join(config.aid_data_root_path, 'val')
    train_log_dir = os.path.join(config.aid_log_root_path, 'train')
    val_log_dir = os.path.join(config.aid_log_root_path, 'val')

    with tf.name_scope('input'):
        image_train_list, label_train_list = get_files(data_train_dir)
        image_val_list, label_val_list = get_files(data_test_dir)
        image_batch, label_batch = get_batch(image_train_list,
                                             label_train_list,
                                             config.aid_img_weight,
                                             config.aid_img_height, BATCH_SIZE,
                                             CAPACITY)
        val_image_batch, val_label_batch = get_batch(image_val_list,
                                                     label_val_list,
                                                     config.aid_img_weight,
                                                     config.aid_img_height,
                                                     BATCH_SIZE, CAPACITY)

    x = tf.placeholder(
        tf.float32,
        shape=[BATCH_SIZE, config.aid_img_weight, config.aid_img_height, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, config.aid_n_class])

    logits = VGG.VGG16N(x, config.aid_n_class, IS_PRETRAIN)
    loss = tools.loss(logits, y_)
    accuracy = tools.accuracy(logits, y_)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    start_time = time.strftime('%Y-%m-%d %H-%M-%S',
                               time.localtime(time.time()))
    print('start_time:', start_time)

    # load the parameter file, assign the parameters, skip the specific layers
    tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            tra_images, tra_labels = sess.run([image_batch, label_batch])
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                            feed_dict={
                                                x: tra_images,
                                                y_: tra_labels
                                            })
            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('Step: %d, loss: %.4f, accuracy: %.4f%%' %
                      (step, tra_loss, tra_acc))
                summary_str = sess.run(summary_op,
                                       feed_dict={
                                           x: tra_images,
                                           y_: tra_labels
                                       })
                tra_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run(
                    [val_image_batch, val_label_batch])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 x: val_images,
                                                 y_: val_labels
                                             })
                print(
                    '**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **' %
                    (step, val_loss, val_acc))

                summary_str = sess.run(summary_op,
                                       feed_dict={
                                           x: val_images,
                                           y_: val_labels
                                       })
                val_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
    end_time = time.strftime('%Y-%m-%d %H-%M-%S', time.localtime(time.time()))
    print('end_time:', end_time)
    def forward(self, x, target=None):
        # backbone
        c3, c4, c5 = self.backbone(x)

        # FPN, 多尺度特征融合
        p5 = self.conv_set_3(c5)
        p5_up = F.interpolate(self.conv_1x1_3(p5),
                              scale_factor=2.0,
                              mode='bilinear',
                              align_corners=True)

        p4 = torch.cat([c4, p5_up], 1)
        p4 = self.conv_set_2(p4)
        p4_up = F.interpolate(self.conv_1x1_2(p4),
                              scale_factor=2.0,
                              mode='bilinear',
                              align_corners=True)

        p3 = torch.cat([c3, p4_up], 1)
        p3 = self.conv_set_1(p3)

        # head
        # s = 32, 预测大物体
        p5 = self.extra_conv_3(p5)
        pred_3 = self.pred_3(p5)

        # s = 16, 预测中物体
        p4 = self.extra_conv_2(p4)
        pred_2 = self.pred_2(p4)

        # s = 8, 预测小物体
        p3 = self.extra_conv_1(p3)
        pred_1 = self.pred_1(p3)

        preds = [pred_1, pred_2, pred_3]
        total_conf_pred = []
        total_cls_pred = []
        total_txtytwth_pred = []
        B = HW = 0
        for pred in preds:
            B_, abC_, H_, W_ = pred.size()

            # 对pred 的size做一些view调整,便于后续的处理
            # [B, anchor_n * C, H, W] -> [B, H, W, anchor_n * C] -> [B, H*W, anchor_n*C]
            pred = pred.permute(0, 2, 3,
                                1).contiguous().view(B_, H_ * W_, abC_)

            # 从pred中分离出objectness预测、类别class预测、bbox的txtytwth预测
            # [B, H*W*anchor_n, 1]
            conf_pred = pred[:, :, :1 * self.num_anchors].contiguous().view(
                B_, H_ * W_ * self.num_anchors, 1)
            # [B, H*W*anchor_n, num_cls]
            cls_pred = pred[:, :, 1 * self.num_anchors:(1 + self.num_classes) *
                            self.num_anchors].contiguous().view(
                                B_, H_ * W_ * self.num_anchors,
                                self.num_classes)
            # [B, H*W*anchor_n, 4]
            txtytwth_pred = pred[:, :, (1 + self.num_classes) *
                                 self.num_anchors:].contiguous()

            total_conf_pred.append(conf_pred)
            total_cls_pred.append(cls_pred)
            total_txtytwth_pred.append(txtytwth_pred)
            B = B_
            HW += H_ * W_

        # 将所有结果沿着H*W这个维度拼接
        conf_pred = torch.cat(total_conf_pred, dim=1)
        cls_pred = torch.cat(total_cls_pred, dim=1)
        txtytwth_pred = torch.cat(total_txtytwth_pred, dim=1)

        # train
        if self.trainable:
            txtytwth_pred = txtytwth_pred.view(B, HW, self.num_anchors, 4)

            # 从txtytwth预测中解算出x1y1x2y2坐标
            x1y1x2y2_pred = (self.decode_boxes(txtytwth_pred) /
                             self.input_size).view(-1, 4)
            x1y1x2y2_gt = target[:, :, 7:].view(-1, 4)
            # 计算pred box与gt box之间的IoU
            iou_pred = tools.iou_score(x1y1x2y2_pred,
                                       x1y1x2y2_gt).view(B, -1, 1)

            # gt conf,这一操作是保证iou不会回传梯度
            with torch.no_grad():
                gt_conf = iou_pred.clone()

            # 我们讲pred box与gt box之间的iou作为objectness的学习目标.
            # [obj, cls, txtytwth, scale_weight, x1y1x2y2] -> [conf, obj, cls, txtytwth, scale_weight]
            target = torch.cat([gt_conf, target[:, :, :7]], dim=2)
            txtytwth_pred = txtytwth_pred.view(B, -1, 4)

            # 计算loss
            conf_loss, cls_loss, bbox_loss, iou_loss = tools.loss(
                pred_conf=conf_pred,
                pred_cls=cls_pred,
                pred_txtytwth=txtytwth_pred,
                pred_iou=iou_pred,
                label=target)

            return conf_loss, cls_loss, bbox_loss, iou_loss

        # test
        else:
            txtytwth_pred = txtytwth_pred.view(B, HW, self.num_anchors, 4)
            with torch.no_grad():
                # batch size = 1
                # 测试时,笔者默认batch是1,
                # 因此,我们不需要用batch这个维度,用[0]将其取走。
                # [B, H*W*num_anchor, 1] -> [H*W*num_anchor, 1]
                conf_pred = torch.sigmoid(conf_pred)[0]
                # [B, H*W*num_anchor, 4] -> [H*W*num_anchor, 4]
                bboxes = torch.clamp(
                    (self.decode_boxes(txtytwth_pred) / self.input_size)[0],
                    0., 1.)
                # [B, H*W*num_anchor, C] -> [H*W*num_anchor, C],
                scores = torch.softmax(cls_pred[0, :, :], dim=1) * conf_pred

                # 将预测放在cpu处理上,以便进行后处理
                scores = scores.to('cpu').numpy()
                bboxes = bboxes.to('cpu').numpy()

                # 后处理
                bboxes, scores, cls_inds = self.postprocess(bboxes, scores)

                return bboxes, scores, cls_inds
Beispiel #17
0
def test_dataset():
    test_img_path = r'/home/vincent/Desktop/jsl thesis/GradTest_vinny'

    test_rs_img(test_img_path)

    class_folder = 'tenniscourt'
    class_folder_path = r'/home/vincent/Desktop/jsl thesis/GradTest_vinny/UCM/dataset_rotated/validation/' + class_folder
    test_img_list, test_label_list = get_one_class_files(class_folder_path)
    # test_img_batch, test_label_batch = get_one_class_batch_new(test_img_list, test_label_list,
    #                                                            config.img_Width,
    #                                                            config.img_Height,
    #                                                            config.test_batch_size, 256)
    path_batch, image_batch, label_batch = get_one_class_batch_new(
        test_img_list, test_label_list, config.test_batch_size, 256)
    x = tf.placeholder(
        tf.float32,
        shape=[config.test_batch_size, config.img_Width, config.img_Height, 3])
    y_ = tf.placeholder(tf.int16,
                        shape=[config.test_batch_size, config.n_class])

    logits = VGG.VGG16N(x, config.n_class, False)

    predict = tf.argmax(logits, 1)
    true_label = tf.argmax(label_batch, 1)
    loss = tools.loss(logits, y_)
    accuracy = tools.accuracy(logits, y_)
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(config.checkpoint_path)

    if ckpt and ckpt.model_checkpoint_path:
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print('step: ', global_step)
        i = 0
        with tf.Session() as sess:
            i = 0
            saver.restore(sess, ckpt.model_checkpoint_path)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            try:
                while not coord.should_stop() and i < 1:
                    path_test, img_test, label_test = sess.run(
                        [path_batch, image_batch, label_batch])
                    true_label_test = sess.run(true_label)
                    acc, pred_test = sess.run([accuracy, predict],
                                              feed_dict={
                                                  x: img_test,
                                                  y_: label_test
                                              })
                    str_acc = class_folder + str(acc) + '\r\n'
                    print(acc)
                    i += 1
                    for index in range(len(pred_test)):
                        if true_label_test[index] != pred_test[index]:
                            str_acc += path_test[index].decode(
                                'utf-8'
                            ) + ':' + config.get_class_name_by_index(
                                pred_test[index]) + '\r\n'
            except tf.errors.OutOfRangeError:
                print('done!')
            finally:
                coord.request_stop()
            coord.join(threads)
            with open(class_folder + '.txt', 'w') as fp:
                fp.write(str_acc)
def train():
    with tf.name_scope('input'):
        train, train_label, val, val_label = input_train_val_split.get_files(
            train_dir, RATIO)
        tra_image_batch, tra_label_batch = input_train_val_split.get_batch(
            train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
        val_image_batch, val_label_batch = input_train_val_split.get_batch(
            val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])

    logits = VGG.VGG16N(x, N_CLASSES, IS_PRETRAIN)
    loss = tools.loss(logits, y_)
    accuracy = tools.accuracy(logits, y_)

    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        tools.load_with_skip(pre_trained_weights, sess, ['fc8'])
        print("load weights done")

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
        val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

        try:
            for step in np.arange(MAX_STEP):
                if coord.should_stop():
                    break
                tra_images, tra_labels = sess.run(
                    [tra_image_batch, tra_label_batch])
                _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                                feed_dict={
                                                    x: tra_images,
                                                    y_: tra_labels
                                                })
                if step % 2 == 0 or (step + 1) == MAX_STEP:

                    print('Step: %d, loss: %.4f, accuracy: %.4f%%' %
                          (step, tra_loss, tra_acc))
                    _, summary_str = sess.run([train_op, summary_op],
                                              feed_dict={
                                                  x: tra_images,
                                                  y_: tra_labels
                                              })
                    tra_summary_writer.add_summary(summary_str, step)

                if step % 4 == 0 or (step + 1) == MAX_STEP:
                    val_images, val_labels = sess.run(
                        [val_image_batch, val_label_batch])
                    val_loss, val_acc = sess.run([loss, accuracy],
                                                 feed_dict={
                                                     x: val_images,
                                                     y_: val_labels
                                                 })

                    print(
                        '**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **'
                        % (step, val_loss, val_acc))
                    _, summary_str = sess.run([train_op, summary_op],
                                              feed_dict={
                                                  x: val_images,
                                                  y_: val_labels
                                              })
                    val_summary_writer.add_summary(summary_str, step)

                if step % 8 == 0 or (step + 1) == MAX_STEP:
                    checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)

        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')

        finally:
            coord.request_stop()

        coord.join(threads)
Beispiel #19
0
    def forward(self, x, target=None):
        # backbone
        c2, c3, c4, c5 = self.backbone(x)
        B = c2.size(0)

        # FPN deconv
        c5 = self.smooth5(c5)
        c4 = self.smooth4(c4 + self.deconv5(c5))
        c3 = self.smooth3(c3 + self.deconv4(c4))
        c2 = self.smooth2(c2 + self.deconv3(c3))

        # head
        conf_pred = self.conf_pred(c2)
        txty_pred = self.txty_pred(c2)
        twth_pred = self.twth_pred(c2)

        # train
        if self.trainable:
            # [B, H*W, num_classes]
            conf_pred = conf_pred.permute(0, 2, 3,
                                          1).contiguous().view(B, -1, 1)
            # [B, H*W, 2]
            txty_pred = txty_pred.permute(0, 2, 3,
                                          1).contiguous().view(B, -1, 2)
            # [B, H*W, 2]
            twth_pred = twth_pred.permute(0, 2, 3,
                                          1).contiguous().view(B, -1, 2)
            # [B, H*W, 4]
            txtytwth_pred = torch.cat([txty_pred, twth_pred], dim=2)

            # compute loss
            conf_loss, txtytwth_loss, total_loss = tools.loss(
                pred_conf=conf_pred,
                pred_txtytwth=txtytwth_pred,
                label=target,
                version='CenterYOLAF')

            return conf_loss, txtytwth_loss, total_loss

        # test
        else:
            with torch.no_grad():
                txtytwth_pred = torch.cat([txty_pred, twth_pred], dim=1)

                # decode
                conf_pred = torch.sigmoid(conf_pred[:, :1, :, :])
                txtytwth_pred = txtytwth_pred.permute(0, 2, 3,
                                                      1).contiguous().view(
                                                          B, -1, 4)
                # simple nms
                hmax = F.max_pool2d(conf_pred, 5, stride=1, padding=2)
                keep = (hmax == conf_pred).float()
                conf_pred *= keep
                # threshold
                conf_pred *= (conf_pred >= self.conf_thresh).float()
                # [B, C, H, W] -> [H, W]
                score = conf_pred[0, 0, :, :]
                # top K
                topk_scores, topk_inds = torch.topk(score.view(-1), self.topk)
                # decode bbox
                all_bbox = torch.clamp(
                    (self.decode_boxes(txtytwth_pred) / self.scale_torch)[0],
                    0., 1.)
                all_bbox = all_bbox[topk_inds]

                # separate box pred and class conf
                scores = topk_scores.cpu().numpy()
                bboxes = all_bbox.cpu().numpy()

                return bboxes, scores
def train():

    step = 0  #step
    bs = 128  #batch size
    pre_trained_weights = main_dir + 'vgg16.npy'  #vgg16 weight
    train_log_dir = main_dir + 'trainloggm1rss/tlog'  #train log path
    val_log_dir = main_dir + 'trainloggm1rss/vlog'  # val log path
    train_data_dir = main_dir + 'ymodellog'  # save model path
    #    rd=main_dir+'modellog'
    #train data
    tra_filename = np.load(main_dir + "sf_filename.npy")
    tra_label = np.load(main_dir + "sf_label.npy")
    tra_vector = np.load(main_dir + "sf_vector.npy")
    tra_4 = np.load(main_dir + "sf_4.npy")
    #val data
    val_filename = np.load(main_dir + "sf_gm1vfilename.npy")
    val_label = np.load(main_dir + "sf_gm1vlabel.npy")
    val_vector = np.load(main_dir + "sf_gm1vvector.npy")
    val_4 = np.load(main_dir + "sf_gm1v4.npy")
    with tf.Graph().as_default() as g:
        tra_image_p = tf.placeholder(tra_filename.dtype, tra_filename.shape)
        tra_label_p = tf.placeholder(tra_label.dtype, tra_label.shape)
        tra_vector_p = tf.placeholder(tra_vector.dtype, tra_vector.shape)
        tra_4_p = tf.placeholder(tra_4.dtype, tra_4.shape)
        tdataset = tf.contrib.data.Dataset.from_tensor_slices(
            (tra_image_p, tra_label_p, tra_vector_p, tra_4_p))
        tdataset = tdataset.map(pre_function, num_threads=64)
        tdataset = tdataset.shuffle(1024 * 16)
        tdataset = tdataset.repeat()  #重复
        tdataset = tdataset.batch(bs)
        tra_iterator = tdataset.make_initializable_iterator()

        val_image_p = tf.placeholder(val_filename.dtype, val_filename.shape)
        val_label_p = tf.placeholder(val_label.dtype, val_label.shape)
        val_vector_p = tf.placeholder(val_vector.dtype, val_vector.shape)
        val_4_p = tf.placeholder(val_4.dtype, val_4.shape)
        vdataset = tf.contrib.data.Dataset.from_tensor_slices(
            (val_image_p, val_label_p, val_vector_p, val_4_p))
        vdataset = vdataset.map(pre_function)
        vdataset = vdataset.repeat()  #重复
        vdataset = vdataset.batch(bs)
        val_iterator = vdataset.make_initializable_iterator()
        # Generate placeholders for the images and labels.
        x = tf.placeholder(tf.float32, shape=[bs, 224, 224, 3])
        v = tf.placeholder(tf.float32, shape=[bs, 280])
        y_ = tf.placeholder(tf.int32, shape=[bs, 2])  #??
        s_ = tf.placeholder(tf.float32, shape=[bs, 4])  #??
        BN_istrain = tf.placeholder(tf.bool)
        # Build a Graph that computes predictions from the inference model.
        logits = VGG16N.VGG16N(x, N_CLASSES, v, BN_istrain)
        # Add to the Graph the Ops for loss calculation.
        loss, mean_summary, total_loss_summary, loss_averages_op = tools.loss(
            logits, y_, s_)
        # Add to the Graph the Ops that calculate and apply gradients.
        my_global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = tools.optimize(loss, my_global_step, loss_averages_op)
        # Add the Op to compare the logits to the labels during evaluation.
        accuracy, accuracy_summary = tools.accuracy(logits, y_)
        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge(
            [mean_summary, accuracy_summary, total_loss_summary])
        # Add the variable initializer Op.
        saver = tf.train.Saver(max_to_keep=100)
        init = tf.global_variables_initializer()
        # Create a saver for writing training checkpoints.
        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
        val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

        # And then after everything is built:
        # Run the Op to initialize the variables.
        sess.run(init)
        tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])
        #        sess.run(tra_iterator.initializer, feed_dict={tra_image_p: tra_filename,tra_label_p: tra_label,tra_vector_p: tra_vector})
        sess.run(val_iterator.initializer,
                 feed_dict={
                     val_image_p: val_filename,
                     val_label_p: val_label,
                     val_vector_p: val_vector,
                     val_4_p: val_4
                 })
        tra_next = tra_iterator.get_next()
        val_next = val_iterator.get_next()
        print("Reading checkpoints...")

        for epoch in range(num_epoch):
            shuu.shu()
            tra_filename = np.load(main_dir + "gm1sf_filename.npy")
            tra_label = np.load(main_dir + "gm1sf_label.npy")
            tra_vector = np.load(main_dir + "gm1sf_vector.npy")
            tra_4 = np.load(main_dir + "gm1sf_4.npy")
            sess.run(tra_iterator.initializer,
                     feed_dict={
                         tra_image_p: tra_filename,
                         tra_label_p: tra_label,
                         tra_vector_p: tra_vector,
                         tra_4_p: tra_4
                     })
            while True:
                try:
                    for step in range(MAX_STEP):
                        tra_all = sess.run(tra_next)
                        tra_i = tra_all[0]
                        tra_l = tra_all[1]
                        tra_v = tra_all[2]
                        tra_f = tra_all[3]
                        summary_str, _, tra_loss, tra_acc = sess.run(
                            [summary, train_op, loss, accuracy],
                            feed_dict={
                                x: tra_i,
                                y_: tra_l,
                                v: tra_v,
                                s_: tra_f,
                                BN_istrain: True
                            })

                        if step % 20 == 0 or (step + 1) == MAX_STEP:
                            tra_summary_writer.add_summary(summary_str, step)
#                        print ('Step: %d, loss: %.4f' % (step, tra_loss))

                        if step % 20 == 0 or (step + 1) == MAX_STEP:
                            val_all = sess.run(val_next)
                            val_i = val_all[0]
                            val_l = val_all[1]
                            val_v = val_all[2]
                            val_f = val_all[3]
                            val_loss, val_acc = sess.run(
                                [loss, accuracy],
                                feed_dict={
                                    x: val_i,
                                    y_: val_l,
                                    v: val_v,
                                    s_: val_f,
                                    BN_istrain: False
                                })
                            print(
                                '**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **'
                                % (step, val_loss, val_acc))

                            summary_str = sess.run(summary,
                                                   feed_dict={
                                                       x: val_i,
                                                       y_: val_l,
                                                       v: val_v,
                                                       s_: val_f,
                                                       BN_istrain: False
                                                   })
                            val_summary_writer.add_summary(summary_str, step)


#                    if step == 99:  # Record execution stats
#                        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
#                        run_metadata = tf.RunMetadata()
#                        summary_str, _= sess.run([summary,train_op],
#                                                    feed_dict={x:tra_i, y_:tra_l, v:tra_v, BN_istrain:True},options=run_options,run_metadata=run_metadata)
#                        tra_summary_writer.add_run_metadata(run_metadata, 'step%d' % step)
#                        tra_summary_writer.add_summary(summary_str, step)
#                        print('Adding run metadata for', step)
                        if step % 10000 == 0:
                            checkpoint_path = os.path.join(
                                train_data_dir, 'model.ckpt')
                            saver.save(sess, checkpoint_path, global_step=step)

                except tf.errors.OutOfRangeError:
                    break
        sess.close()
Beispiel #21
0
def validate2():
    val_tfrecords = TFRECORDS_PATH + 'test5.tfrecords'

    val_image_batch, val_label_batch = input_data.get_batch(val_tfrecords,
                                                            BATCH_SIZE,
                                                            num_epochs=1)

    x = tf.placeholder(tf.float32, [BATCH_SIZE, WIDTH, HEIGHT, CHANNEL],
                       name='x-input')
    y_ = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES], name='y-input')

    y = model.inference2(x, NUM_CLASSES, evaluate=False)
    predict_y = tf.argmax(y, 1)
    val_loss = tools.loss(logits=y, labels=y_)
    val_acc = tools.accuracy(logits=y, labels=y_)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        ckpt = tf.train.get_checkpoint_state(MODEL2_SAVE_PATH)

        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                '-')[-1]

            global_score = 0.
            num_step = 0
            try:
                for i in range(TRAINING_STEPS):
                    if coord.should_stop():
                        break

                    val_xs, val_ys = sess.run(
                        [val_image_batch, val_label_batch])
                    yy, loss_value, acc_value = sess.run(
                        [predict_y, val_loss, val_acc],
                        feed_dict={
                            x: val_xs,
                            y_: val_ys
                        })
                    global_score += acc_value
                    num_step += 1
                    if i % 5 == 0:
                        print(
                            "in the %dth batch: After %s training step(s), validation accuracy = %g"
                            % (i, global_step, acc_value))


#                        plot_images(val_xs, val_ys)

            except tf.errors.OutOfRangeError:
                print("global accuracy = %g" % (global_score / num_step))
                print('Done testing -- epoch limit reached')

            finally:
                coord.request_stop()

            coord.join(threads)
Beispiel #22
0
    def forward(self, x, target=None):
        # backbone
        c3, c4, c5 = self.backbone(x)

        # neck
        c5 = self.spp(c5)

        # FPN + PAN
        # head
        c6 = self.head_conv_0(c5)
        c7 = self.head_upsample_0(c6)  # s32->s16
        c8 = torch.cat([c7, c4], dim=1)
        c9 = self.head_csp_0(c8)
        # P3/8
        c10 = self.head_conv_1(c9)
        c11 = self.head_upsample_1(c10)  # s16->s8
        c12 = torch.cat([c11, c3], dim=1)
        c13 = self.head_csp_1(c12)  # to det
        # p4/16
        c14 = self.head_conv_2(c13)
        c15 = torch.cat([c14, c10], dim=1)
        c16 = self.head_csp_2(c15)  # to det
        # p5/32
        c17 = self.head_conv_3(c16)
        c18 = torch.cat([c17, c6], dim=1)
        c19 = self.head_csp_3(c18)  # to det

        # det
        pred_s = self.head_det_1(c13)
        pred_m = self.head_det_2(c16)
        pred_l = self.head_det_3(c19)

        preds = [pred_s, pred_m, pred_l]
        total_conf_pred = []
        total_cls_pred = []
        total_txtytwth_pred = []
        B = HW = 0
        for pred in preds:
            B_, abC_, H_, W_ = pred.size()

            # [B, anchor_n * C, H, W] -> [B, H, W, anchor_n * C] -> [B, H*W, anchor_n*C]
            pred = pred.permute(0, 2, 3,
                                1).contiguous().view(B_, H_ * W_, abC_)

            # Divide prediction to obj_pred, xywh_pred and cls_pred
            # [B, H*W*anchor_n, 1]
            conf_pred = pred[:, :, :1 * self.num_anchors].contiguous().view(
                B_, H_ * W_ * self.num_anchors, 1)
            # [B, H*W*anchor_n, num_cls]
            cls_pred = pred[:, :, 1 * self.num_anchors:(1 + self.num_classes) *
                            self.num_anchors].contiguous().view(
                                B_, H_ * W_ * self.num_anchors,
                                self.num_classes)
            # [B, H*W*anchor_n, 4]
            txtytwth_pred = pred[:, :, (1 + self.num_classes) *
                                 self.num_anchors:].contiguous()

            total_conf_pred.append(conf_pred)
            total_cls_pred.append(cls_pred)
            total_txtytwth_pred.append(txtytwth_pred)
            B = B_
            HW += H_ * W_

        conf_pred = torch.cat(total_conf_pred, dim=1)
        cls_pred = torch.cat(total_cls_pred, dim=1)
        txtytwth_pred = torch.cat(total_txtytwth_pred, dim=1)

        # train
        if self.trainable:
            txtytwth_pred = txtytwth_pred.view(B, HW, self.num_anchors, 4)

            # 从txtytwth预测中解算出x1y1x2y2坐标
            x1y1x2y2_pred = (self.decode_boxes(txtytwth_pred) /
                             self.input_size).view(-1, 4)
            x1y1x2y2_gt = target[:, :, 7:].view(-1, 4)
            # 计算pred box与gt box之间的IoU
            iou_pred = tools.iou_score(x1y1x2y2_pred,
                                       x1y1x2y2_gt).view(B, -1, 1)

            # gt conf,这一操作是保证iou不会回传梯度
            with torch.no_grad():
                gt_conf = iou_pred.clone()

            # 我们讲pred box与gt box之间的iou作为objectness的学习目标.
            # [obj, cls, txtytwth, scale_weight, x1y1x2y2] -> [conf, obj, cls, txtytwth, scale_weight]
            target = torch.cat([gt_conf, target[:, :, :7]], dim=2)
            txtytwth_pred = txtytwth_pred.view(B, -1, 4)

            # 计算loss
            conf_loss, cls_loss, bbox_loss, iou_loss = tools.loss(
                pred_conf=conf_pred,
                pred_cls=cls_pred,
                pred_txtytwth=txtytwth_pred,
                pred_iou=iou_pred,
                label=target)

            return conf_loss, cls_loss, bbox_loss, iou_loss

        # test
        else:
            txtytwth_pred = txtytwth_pred.view(B, HW, self.num_anchors, 4)
            with torch.no_grad():
                # batch size = 1
                # 测试时,笔者默认batch是1,
                # 因此,我们不需要用batch这个维度,用[0]将其取走。
                # [B, H*W*num_anchor, 1] -> [H*W*num_anchor, 1]
                conf_pred = torch.sigmoid(conf_pred)[0]
                # [B, H*W*num_anchor, 4] -> [H*W*num_anchor, 4]
                bboxes = torch.clamp(
                    (self.decode_boxes(txtytwth_pred) / self.input_size)[0],
                    0., 1.)
                # [B, H*W*num_anchor, C] -> [H*W*num_anchor, C],
                scores = torch.softmax(cls_pred[0, :, :], dim=1) * conf_pred

                # 将预测放在cpu处理上,以便进行后处理
                scores = scores.to('cpu').numpy()
                bboxes = bboxes.to('cpu').numpy()

                # 后处理
                bboxes, scores, cls_inds = self.postprocess(bboxes, scores)

                return bboxes, scores, cls_inds
    def forward(self, x, target=None):
        # backbone
        c2, c3, c4, c5 = self.backbone(x)
        B = c5.size(0)

        # bottom-up
        p5 = self.spp(c5)
        p4 = self.deconv5(p5)
        p3 = self.deconv4(p4)
        p2 = self.deconv3(p3)

        # head
        cls_pred = self.cls_pred(p2)
        txty_pred = self.txty_pred(p2)
        twth_pred = self.twth_pred(p2)

        # train
        if self.trainable:
            # [B, H*W, num_classes]
            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(
                B, -1, self.num_classes)
            # [B, H*W, 2]
            txty_pred = txty_pred.permute(0, 2, 3,
                                          1).contiguous().view(B, -1, 2)
            # [B, H*W, 2]
            twth_pred = twth_pred.permute(0, 2, 3,
                                          1).contiguous().view(B, -1, 2)

            # compute loss
            cls_loss, txty_loss, twth_loss, total_loss = tools.loss(
                pred_cls=cls_pred,
                pred_txty=txty_pred,
                pred_twth=twth_pred,
                label=target,
                num_classes=self.num_classes)

            return cls_loss, txty_loss, twth_loss, total_loss

        # test
        else:
            with torch.no_grad():
                # batch_size = 1
                cls_pred = torch.sigmoid(cls_pred)
                # simple nms
                hmax_1 = F.max_pool2d(cls_pred,
                                      kernel_size=3,
                                      padding=1,
                                      stride=1)
                hmax_2 = F.max_pool2d(cls_pred,
                                      kernel_size=5,
                                      padding=2,
                                      stride=1)
                hmax = torch.max(hmax_1, hmax_2)
                keep = (hmax == cls_pred).float()
                cls_pred *= keep

                # decode box
                txtytwth_pred = torch.cat([txty_pred, twth_pred],
                                          dim=1).permute(0, 2, 3,
                                                         1).contiguous().view(
                                                             B, -1, 4)
                # [B, H*W, 4] -> [H*W, 4]
                bbox_pred = torch.clamp(
                    (self.decode_boxes(txtytwth_pred) / self.scale_torch)[0],
                    0., 1.)

                # topk
                topk_scores, topk_inds, topk_clses = self._topk(cls_pred)

                topk_scores = topk_scores[0].cpu().numpy()
                topk_ind = topk_clses[0].cpu().numpy()
                topk_bbox_pred = bbox_pred[topk_inds[0]].cpu().numpy()

                if self.use_nms:
                    # nms
                    keep = np.zeros(len(topk_bbox_pred), dtype=np.int)
                    for i in range(self.num_classes):
                        inds = np.where(topk_ind == i)[0]
                        if len(inds) == 0:
                            continue
                        c_bboxes = topk_bbox_pred[inds]
                        c_scores = topk_scores[inds]
                        c_keep = self.nms(c_bboxes, c_scores)
                        keep[inds[c_keep]] = 1

                    keep = np.where(keep > 0)
                    topk_bbox_pred = topk_bbox_pred[keep]
                    topk_scores = topk_scores[keep]
                    topk_ind = topk_ind[keep]

                return topk_bbox_pred, topk_scores, topk_ind
Beispiel #24
0
def train_running():

    with tf.Graph().as_default():

        with tf.name_scope('input'):

            mnist = input_data.read_data_sets('../MNIST_data/', one_hot=True)

        x = tf.placeholder(tf.float32, shape=[None, 784])
        x_reshape = tf.reshape(x, [-1, 28, 28, 1])
        y_ = tf.placeholder(tf.float32, [None, num_classes])
        keep_prob = tf.placeholder(tf.float32)

        model = models.Model(x_reshape, num_classes)
        model.lenet5()
        logits = model.logits

        loss = tools.loss(logits, y_)
        regular_loss = tf.add_n(tf.get_collection('loss'))
        loss = loss + 1e-4 * regular_loss
        acc = tools.accuracy(logits, y_)
        train_op = tools.optimize(loss, learning_rate)

        with tf.Session() as sess:

            saver = tf.train.Saver()
            sess.run(tf.global_variables_initializer())

            summary_op = tf.summary.merge_all()
            train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
            val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph)

            start_time = time.time()
            print('Training Start...')
            for step in np.arange(max_step):

                tra_images, tra_labels = mnist.train.next_batch(batch_size)
                _, tra_loss, tra_acc = sess.run([train_op, loss, acc],
                                                feed_dict={
                                                    x: tra_images,
                                                    y_: tra_labels,
                                                    keep_prob: 0.5
                                                })

                if step % 50 == 0:
                    print(
                        'Step %d, train loss = %.4f, train accuracy = %.2f%%' %
                        (step, tra_loss, tra_acc))
                    summary_str = sess.run(summary_op,
                                           feed_dict={
                                               x: tra_images,
                                               y_: tra_labels,
                                               keep_prob: 0.5
                                           })
                    train_writer.add_summary(summary_str, step)


#                #
                if step % 200 == 0 or (step + 1) == max_step:
                    val_loss, val_acc = sess.run(
                        [loss, acc],
                        feed_dict={
                            x: mnist.validation.images,
                            y_: mnist.validation.labels,
                            keep_prob: 1.0
                        })
                    print(
                        '**  Step %d, val loss = %.4f, val accuracy = %.2f%%  **'
                        % (step, val_loss, val_acc))
                    summary_str = sess.run(summary_op,
                                           feed_dict={
                                               x: mnist.validation.images,
                                               y_: mnist.validation.labels,
                                               keep_prob: 1.0
                                           })
                    val_writer.add_summary(summary_str, step)
                    #
                if step % 2000 == 0 or (step + 1) == max_step:
                    checkpoint_path = os.path.join(model_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step + 1)

            end_time = time.time()
            time_dif = end_time - start_time
            print('Training end...')
            print('Time usage: ' +
                  str(timedelta(seconds=int(round(time_dif)))))

            print('Testing...')
            test_acc = sess.run(acc,
                                feed_dict={
                                    x: mnist.test.images,
                                    y_: mnist.test.labels,
                                    keep_prob: 1.0
                                })
            print('Test accuarcy: %.2f%%' % test_acc)
Beispiel #25
0
def train():

    #    pre_trained_weights1 = './/vgg16.npy'
    pre_trained_weights = './/vgg-face.mat'
    data_dir = '/home/hadoop/Desktop/My-TensorFlow-tutorials-master/VGG face segmentation  recognition/data/segmentation/training/'
    train_log_dir = './/logss/train_shuffle/'
    val_log_dir = './/logss/va_shuffle/'

    #    image_batch, label_batch = notMNIST_input.read_and_decode(tfrecords_file,BATCH_SIZE)
    image, label = notMNIST_input.get_file(data_dir)
    #        image_batch,label_batch=notMNIST_input.get_batch(image, label, IMG_W, IMG_H, BATCH_SIZE, capacity)
    X = np.array(image)
    Y = np.array(label)
    kf = KFold(n_splits=10, shuffle=False)
    total_acc = 0
    for train, test in kf.split(X, Y):
        tf.reset_default_graph()
        image_batch, label_batch = notMNIST_input.get_batch(X[train],
                                                            Y[train],
                                                            IMG_W,
                                                            IMG_H,
                                                            BATCH_SIZE,
                                                            capacity,
                                                            shuffle=True)
        image_batch_validate, label_batch_validate = notMNIST_input.get_batch(
            X[test],
            Y[test],
            IMG_W,
            IMG_H,
            BATCH_SIZE,
            capacity,
            shuffle=False)
        #        print("dddd")
        ##        print("train_index: , test_index:", (X[train],Y[train],X[test],Y[test]))
        print("X[train]/n", len(X[train]))
        print("Y[train]/n", len(Y[train]))
        print("X[test]", len(X[test]))
        print("Y[test]", len(Y[test]))

        #cast (1.8,3.4)float32 to (1,3)int64

        x = tf.placeholder(tf.float32,
                           shape=[BATCH_SIZE, IMG_W, IMG_H, 3],
                           name='place_x')
        y_ = tf.placeholder(tf.int64, shape=[
            BATCH_SIZE,
        ], name='place_y')
        logits = VGG.VGG16N(x, N_CLASSES, IS_PRETRAIN)
        print("****logits shape is ", logits.shape)

        loss = tools.loss(logits, y_)

        print("label_batch is ", y_.shape)
        accuracy = tools.accuracy(logits, y_)

        my_global_step = tf.Variable(0, name='global_step', trainable=False)
        #learning_rate = tf.train.exponential_decay(starter_learning_rate, my_global_step,
        #  2200, 0.96, staircase=True)
        train_op = tools.optimize(loss, starter_learning_rate, my_global_step)
        #    train_op_vali = tools.optimize(loss_vali, learning_rate, my_global_step)

        saver = tf.train.Saver(tf.global_variables())
        summary_op = tf.summary.merge_all()

        init = tf.global_variables_initializer()

        sess = tf.Session()

        sess.run(init)

        # load the parameter file, assign the parameters, skip the specific layers
        tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

        merged_summaries = tf.summary.merge_all()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
        val_summary_writer = tf.summary.FileWriter(val_log_dir)
        max_acc = 0
        total_time = 0

        try:
            for step in np.arange(MAX_STEP):
                if coord.should_stop():
                    break
                start_time = time.time()
                #        with tf.Session() as sess:

                #                 for train, test in kf.split(X,Y):
                #                     image_batch,label_batch=notMNIST_input.get_batch(X[train], Y[train], IMG_W, IMG_H, BATCH_SIZE, capacity)
                #                     image_batch_validate, label_batch_validate=notMNIST_input.get_batch(X[test], Y[test], IMG_W, IMG_H, BATCH_SIZE, capacity)
                #                     label_batch = tf.cast(label_batch,dtype=tf.int64)
                x_train_a, y_train_a = sess.run([image_batch, label_batch])
                x_test_a, y_test_a = sess.run(
                    [image_batch_validate, label_batch_validate])
                #            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy])
                #            tra_images,tra_labels = sess.run([image_batch, label_batch])
                _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                                feed_dict={
                                                    x: x_train_a,
                                                    y_: y_train_a
                                                })

                if step % 10 == 0 or (step + 1) == MAX_STEP:
                    feed_dict = {x: x_train_a, y_: y_train_a}
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    tra_summary_writer.add_summary(summary_str, step)
                    time_elapsed = time.time() - start_time
                    print(
                        'Step:%d , loss: %.2f, accuracy: %.2f%%(%.2f sec/step)'
                        % (step, tra_loss, tra_acc * 100, time_elapsed))

                    total_time = total_time + time_elapsed
                    if step % 50 == 0:
                        print('total time is :%.2f' % (total_time))

                if step % 200 == 0 or (step + 1) == MAX_STEP:

                    val_loss, val_acc = sess.run([loss, accuracy],
                                                 feed_dict={
                                                     x: x_test_a,
                                                     y_: y_test_a
                                                 })
                    feed_dict = {x: x_test_a, y_: y_test_a}
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    val_summary_writer.add_summary(summary_str, step)

                    #                if cur_val_loss > max_acc:
                    #                         max_acc = cur_val_loss
                    #                         best_step = step
                    #                         checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                    #                         saver.save(sess, checkpoint_path, global_step=step)
                    #                val_summary_writer.add_summary(summary, step)
                    #                print("Model updated and saved in file: %s" % checkpoint_path)
                    #                print ('*************step %5d: loss %.5f, acc %.5f --- loss val %0.5f, acc val %.5f************'%(best_step,tra_loss, tra_acc, cur_val_loss, cur_val_eval))

                    #

                    print(
                        '************validate result:Step:%d , loss: %.2f, accuracy: %.2f%%(%.2f sec/step)'
                        % (step, val_loss, val_acc * 100, time_elapsed))
                    if val_acc > max_acc:
                        max_acc = val_acc
                        checkpoint_path = os.path.join(train_log_dir,
                                                       'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)
            if max_acc > total_acc:
                total_acc = max_acc
                checkpoint_path = os.path.join(val_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            coord.request_stop()

        coord.join(threads)
        sess.close()
Beispiel #26
0
def train():
    pre_trained_weights = './/vgg16_pretrain//vgg16.npy'
    data_dir = './/data//cifar-10-batches-bin//'
    train_log_dir = './/logs//train//'
    val_log_dir = './/logs//val//'

    with tf.name_scope('input'):
        tra_image_batch, tra_label_batch = input_data.read_cifar10(
            data_dir=data_dir,
            is_train=True,
            batch_size=BATCH_SIZE,
            shuffle=True)
        val_image_batch, val_label_batch = input_data.read_cifar10(
            data_dir=data_dir,
            is_train=False,
            batch_size=BATCH_SIZE,
            shuffle=False)

    logits = VGG.VGG16N(tra_image_batch, N_CLASSES, IS_PRETRAIN)
    loss = tools.loss(logits, tra_label_batch)
    accuracy = tools.accuracy(logits, tra_label_batch)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    # load the parameter file, assign the parameters, skip the specific layers
    tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            tra_images, tra_labels = sess.run(
                [tra_image_batch, tra_label_batch])
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                            feed_dict={
                                                x: tra_images,
                                                y_: tra_labels
                                            })
            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('Step: %d, loss: %.4f, accuracy: %.4f%%' %
                      (step, tra_loss, tra_acc))
                summary_str = sess.run(summary_op)
                tra_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run(
                    [val_image_batch, val_label_batch])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 x: val_images,
                                                 y_: val_labels
                                             })
                print(
                    '**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **' %
                    (step, val_loss, val_acc))

                summary_str = sess.run(summary_op)
                val_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
def train():
    pre_trained_weights = './vgg16_pretrain/vgg16.npy'
    train_data_dir = './data/train/scene_train_images_20170904/'
    train_label_json = './data/train/scene_train_annotations_20170904.json'
    val_data_dir = './data/val/scene_validation_images_20170908/'
    val_label_json = './data/val/scene_validation_annotations_20170908.json'
    train_log_dir = './logs/train/'
    val_log_dir = './logs/val/'

    with tf.name_scope('input'):

        tra_images, tra_labels = input_data.get_files(train_label_json,
                                                      train_data_dir)

        tra_image_batch, tra_label_batch = input_data.get_batch(
            tra_images, tra_labels, IMG_W, IMG_H, BATCH_SIZE, CAPACITY,
            N_CLASSES)

        val_images, val_labels = input_data.get_files(val_label_json,
                                                      val_data_dir)
        val_image_batch, val_label_batch = input_data.get_batch(
            val_images, val_labels, IMG_W, IMG_H, BATCH_SIZE, CAPACITY,
            N_CLASSES)

    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])
    keep_prob = tf.placeholder(tf.float32)

    # %%
    logits = VGG.VGG16N(x, N_CLASSES, keep_prob, IS_PRETRAIN)
    # #%%
    # import ResNet
    # resnet = ResNet.ResNet()
    # _, logits = resnet.build(x, N_CLASSES, last_layer_type="softmax")
    # #%%
    # import InceptionV4
    # inception = InceptionV4.InceptionModel(x, [BATCH_SIZE, IMG_W, IMG_H, 3], [BATCH_SIZE, N_CLASSES], keep_prob,
    #                                        ckpt_path='train_model/model', model_path='saved_model/model')
    # logits = inception.define_model()
    # print('shape{}'.format(logits.shape))
    loss = tools.loss(logits, y_)
    accuracy = tools.accuracy(logits, y_)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    saver = tf.train.Saver(tf.global_variables())
    #    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    # load the parameter file, assign the parameters, skip the specific layers
    # tools.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    #    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    #    val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            train_images, train_labels = sess.run(
                [tra_image_batch, tra_label_batch])
            # print(str(train_images.get_shape()))
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                            feed_dict={
                                                x: train_images,
                                                y_: train_labels,
                                                keep_prob: 0.2
                                            })
            if step % 50 == 0 or (step + 1) == MAX_STEP:
                #                _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                #                                                feed_dict={x: train_images, y_: train_labels})
                print('Step: %d, loss: %.3f, accuracy: %.3f%%' %
                      (step, tra_loss, tra_acc))
            # summary_str = sess.run(summary_op)
            #                tra_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                validation_images, validation_labels = sess.run(
                    [val_image_batch, val_label_batch])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 x: validation_images,
                                                 y_: validation_labels,
                                                 keep_prob: 1
                                             })
                print(
                    '**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **' %
                    (step, val_loss, val_acc))

            # summary_str = sess.run(summary_op)
            #                val_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
Beispiel #28
0
def train():
    data_dir = '/home/xinlong/Tensorflow_workspace/canjian_AlexNet/JPG/trainval/'
    train_log_dir = '/home/xinlong/Tensorflow_workspace/canjian_AlexNet/log/train/'
    val_log_dir = '/home/xinlong/Tensorflow_workspace/canjian_AlexNet/log/val/'

    with tf.name_scope('input'):
        train, train_label, val, val_label = input_trainval.get_files(data_dir, 0.2)
        train_batch, train_label_batch = input_trainval.get_batch(train, train_label,
                                                                  IMG_H, IMG_W,
                                                                  BATCH_SIZE,
                                                                  CAPACITY)
        val_batch, val_label_batch = input_trainval.get_batch(val, val_label,
                                                              IMG_W, IMG_H,
                                                              BATCH_SIZE,
                                                              CAPACITY)

        x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_H, IMG_W, 3])
        y_ = tf.placeholder(tf.int32, shape=[BATCH_SIZE])

        logits = model_structure.AlexNet(x, 5)
        loss = tools.loss('loss', y_, logits)
        accuracy = tools.accuracy('accuracy', y_, logits)

        my_global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = tools.optimize('optimize', loss, LEARNING_RATE, my_global_step) #??

        saver = tf.train.Saver(tf.global_variables())
        summary_op = tf.summary.merge_all()

        init = tf.initialize_all_variables()


        with tf.Session() as sess:
            sess.run(init)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            tra_summary_writer = tf.summary.FileWriter(train_log_dir,sess.graph)
            val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)

            try:
                for step in np.arange(MAX_STEP):
                    if coord.should_stop():
                        break

                    tra_images, tra_labels = sess.run([train_batch, train_label_batch])

                    _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],feed_dict={x:tra_images, y_:tra_labels})

                    if step % 10 == 0 or (step + 1) == MAX_STEP:
                        print('Step: %d, loss: %.4f, accuracy: %.4f' %(step, tra_loss, tra_acc))

                        #summary_str = sess.run(summary_op)

                        #tra_summary_writer.add_summary(summary_str, step)
                        checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)
                    
                    if step % 20 == 0 or (step + 1) == MAX_STEP:
                        valid_images, valid_labels = sess.run([val_batch, val_label_batch])
                        valid_loss, valid_acc = sess.run([loss, accuracy],
                                                         feed_dict={x:valid_images, y_:valid_labels})
                        print( '** step: %d,  loss: %.4f,  accuracy: %.4f' %(step, valid_loss, valid_acc))
                        #summary_str = sess.run(summary_op)
                        #val_summary_writer.add_summary(summary_str, step)


                    if step % 2000 == 0 or (step + 1) == MAX_STEP:
                        checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)


            except tf.error.OutOfRangeError:
                print('Done training -- epoch limit reached')
            finally:
                coord.request_stop()

            coord.join(threads)
Beispiel #29
0
def train():
    print('loding data............')

    #导入数据
    with tf.name_scope('input'):
        train, train_label, test, test_label = Process.get_data(
            train_path, test_path)
        train_batch, train_label_batch = Process.get_batch(
            train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
        test_batch, test_label_batch = Process.get_batch(
            test, test_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

    print('loding batch_data complete.......')

    #创建placeholder作为输入和标签
    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASS])

    #定义模型
    logits = vgg.VGG16N(x, N_CLASS, IS_PRETRAIN)
    #定义损失
    loss = tools.loss(logits, y_)
    #计算准确率
    accuracy = tools.accuracy(logits, y_)
    #全局步骤
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    #梯度下降
    train_op = tools.optimize(loss, learning_rate, my_global_step)

    #保存训练步骤
    saver = tf.train.Saver(tf.global_variables())
    #summary_op = tf.summary.merge_all()
    #全局变量初始操作
    init = tf.global_variables_initializer()
    #创建sess
    sess = tf.Session()
    #全局变量操作
    sess.run(init)
    #启动coord
    coord = tf.train.Coordinator()
    #启动队列
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    #一些tensorboard的可视化操作,由于会出现问题,我先注释掉了
    #  tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    #  val_summary_writer = tf.summary.FileWriter(test_log_dir, sess.graph)

    print('all init has been done! start training')

    try:
        for step in np.arange(MAX_STEP):
            print('step + ' + str(step) + 'is now')
            if coord.should_stop():
                break
            #从队列中取batch
            tra_images, tra_labels = sess.run([train_batch, train_label_batch])
            #计算损失和准确率
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                            feed_dict={
                                                x: tra_images,
                                                y_: tra_labels
                                            })

            #如果到达10步的倍数,打印在现在的batch_size上的训练准确率
            if step % 10 == 0 or (step + 1) == MAX_STEP:
                print('Step: %d, loss: %.4f, accuracy: %.4f%%' %
                      (step, tra_loss, tra_acc))
            # summary_str = sess.run(summary_op)
            # tra_summary_writer.add_summary(summary_str, step)

            #如果步骤达到200的倍数,输入一些训练数据查看在训练集上的准确率
            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run(
                    [test_batch, test_label_batch])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 x: val_images,
                                                 y_: val_labels
                                             })
                print(
                    '**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **' %
                    (step, val_loss, val_acc))

            #  summary_str = sess.run(summary_op)
            #   val_summary_writer.add_summary(summary_str, step)

            #如果步骤达到了2000步,保存当前点的数据
            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
def train():
    
    pre_trained_weights = './/vgg16_pretrain//vgg16.npy'
    data_dir = './/data//cifar-10-batches-bin//'
    train_log_dir = './/logs//train//'
    val_log_dir = './/logs//val//'
    
    with tf.name_scope('input'):
        tra_image_batch, tra_label_batch = input_data.read_cifar10(data_dir=data_dir,
                                                 is_train=True,
                                                 batch_size= BATCH_SIZE,
                                                 shuffle=True)
        val_image_batch, val_label_batch = input_data.read_cifar10(data_dir=data_dir,
                                                 is_train=False,
                                                 batch_size= BATCH_SIZE,
                                                 shuffle=False)
    
    logits = VGG.VGG16N(tra_image_batch, N_CLASSES, IS_PRETRAIN)
    loss = tools.loss(logits, tra_label_batch)
    accuracy = tools.accuracy(logits, tra_label_batch)
    my_global_step = tf.Variable(0, name='global_step', trainable=False) 
    train_op = tools.optimize(loss, learning_rate, my_global_step)
    
    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])    
    
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()   
       
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    # load the parameter file, assign the parameters, skip the specific layers
    tools.load_with_skip(pre_trained_weights, sess, ['fc6','fc7','fc8'])   


    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)    
    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)
    
    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                    break
                
            tra_images,tra_labels = sess.run([tra_image_batch, tra_label_batch])
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy],
                                            feed_dict={x:tra_images, y_:tra_labels})            
            if step % 50 == 0 or (step + 1) == MAX_STEP:                 
                print ('Step: %d, loss: %.4f, accuracy: %.4f%%' % (step, tra_loss, tra_acc))
                summary_str = sess.run(summary_op)
                tra_summary_writer.add_summary(summary_str, step)
                
            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run([val_image_batch, val_label_batch])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={x:val_images,y_:val_labels})
                print('**  Step %d, val loss = %.2f, val accuracy = %.2f%%  **' %(step, val_loss, val_acc))

                summary_str = sess.run(summary_op)
                val_summary_writer.add_summary(summary_str, step)
                    
            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()
        
    coord.join(threads)
    sess.close()
Beispiel #31
0
    def forward(self, x, target=None):
        # backbone
        C_4, C_5 = self.backbone(x)

        # detection head
        # multi scale feature map fusion
        C_5 = self.conv_set_2(C_5)
        C_5_up = F.interpolate(self.conv_1x1_2(C_5),
                               scale_factor=2.0,
                               mode='bilinear',
                               align_corners=True)

        C_4 = torch.cat([C_4, C_5_up], dim=1)
        C_4 = self.conv_set_1(C_4)

        # head
        # s = 32
        C_5 = self.extra_conv_2(C_5)
        pred_2 = self.pred_2(C_5)

        # s = 16
        pred_1 = self.pred_1(C_4)

        preds = [pred_1, pred_2]
        total_conf_pred = []
        total_cls_pred = []
        total_txtytwth_pred = []
        B = HW = 0
        for pred in preds:
            B_, abC_, H_, W_ = pred.size()

            # [B, anchor_n * C, H, W] -> [B, H, W, anchor_n * C] -> [B, H*W, anchor_n*C]
            pred = pred.permute(0, 2, 3,
                                1).contiguous().view(B_, H_ * W_, abC_)

            # Divide prediction to obj_pred, xywh_pred and cls_pred
            # [B, H*W*anchor_n, 1]
            conf_pred = pred[:, :, :1 * self.anchor_number].contiguous().view(
                B_, H_ * W_ * self.anchor_number, 1)
            # [B, H*W*anchor_n, num_cls]
            cls_pred = pred[:, :,
                            1 * self.anchor_number:(1 + self.num_classes) *
                            self.anchor_number].contiguous().view(
                                B_, H_ * W_ * self.anchor_number,
                                self.num_classes)
            # [B, H*W*anchor_n, 4]
            txtytwth_pred = pred[:, :, (1 + self.num_classes) *
                                 self.anchor_number:].contiguous()

            total_conf_pred.append(conf_pred)
            total_cls_pred.append(cls_pred)
            total_txtytwth_pred.append(txtytwth_pred)
            B = B_
            HW += H_ * W_

        conf_pred = torch.cat(total_conf_pred, 1)
        cls_pred = torch.cat(total_cls_pred, 1)
        txtytwth_pred = torch.cat(total_txtytwth_pred, 1)

        # test
        if not self.trainable:
            txtytwth_pred = txtytwth_pred.view(B, HW, self.anchor_number, 4)
            with torch.no_grad():
                # batch size = 1
                all_obj = torch.sigmoid(conf_pred)[
                    0]  # 0 is because that these is only 1 batch.
                all_bbox = torch.clamp(
                    (self.decode_boxes(txtytwth_pred) / self.scale_torch)[0],
                    0., 1.)
                all_class = (torch.softmax(cls_pred[0, :, :], dim=1) * all_obj)
                # separate box pred and class conf
                all_obj = all_obj.to('cpu').numpy()
                all_class = all_class.to('cpu').numpy()
                all_bbox = all_bbox.to('cpu').numpy()

                bboxes, scores, cls_inds = self.postprocess(
                    all_bbox, all_class)

                # print(len(all_boxes))
                return bboxes, scores, cls_inds

        else:
            txtytwth_pred = txtytwth_pred.view(B, HW, self.anchor_number, 4)
            # decode bbox, and remember to cancel its grad since we set iou as the label of objectness.
            with torch.no_grad():
                x1y1x2y2_pred = (self.decode_boxes(txtytwth_pred) /
                                 self.scale_torch).view(-1, 4)

            txtytwth_pred = txtytwth_pred.view(B, -1, 4)

            x1y1x2y2_gt = target[:, :, 7:].view(-1, 4)

            # compute iou
            iou = tools.iou_score(x1y1x2y2_pred, x1y1x2y2_gt).view(B, -1, 1)
            # print(iou.min(), iou.max())

            # we set iou between pred bbox and gt bbox as conf label.
            # [obj, cls, txtytwth, x1y1x2y2] -> [conf, obj, cls, txtytwth]
            target = torch.cat([iou, target[:, :, :7]], dim=2)

            conf_loss, cls_loss, txtytwth_loss, total_loss = tools.loss(
                pred_conf=conf_pred,
                pred_cls=cls_pred,
                pred_txtytwth=txtytwth_pred,
                label=target,
                num_classes=self.num_classes,
                obj_loss_f='mse')

            return conf_loss, cls_loss, txtytwth_loss, total_loss
Beispiel #32
0
def run_training():
    num_classes = 1329
    IMG_W = 448
    IMG_H = 448
    CAPACITY = 1000
    train_dir = 'tfrecords'
    BATCH_SIZE = FLAGS.batch_size
    train_all = FLAGS.train_all
    learning_rate = FLAGS.learning_rate
    momentum = FLAGS.momentum
    num_epoch = FLAGS.num_epoch
    logger.info('learning_rate ' + str(learning_rate))
    logger.info('num_epoch ' + str(num_epoch))
    total_train_count, total_val_count, total_test_count = input_data.get_total_count(
        'total_count.txt')
    train_batch, train_label_batch = input_data.get_batch(
        train_dir, 'train', IMG_W, IMG_H, BATCH_SIZE, CAPACITY, True)
    val_batch, val_label_batch = input_data.get_batch(train_dir, 'validataion',
                                                      IMG_W, IMG_H, BATCH_SIZE,
                                                      CAPACITY, False)
    test_batch, test_label_batch = input_data.get_batch(
        train_dir, 'test', IMG_W, IMG_H, BATCH_SIZE, CAPACITY, False)

    imgs = tf.placeholder(tf.float32, [BATCH_SIZE, IMG_W, IMG_H, 3])
    labels = tf.placeholder(tf.int32, [BATCH_SIZE])
    keep_pro = tf.placeholder(tf.float32)
    vgg = bilinear_vgg(imgs, num_classes, train_all, keep_pro)
    loss = tools.loss(vgg.logits, labels)
    accuracy, num_correct_preds = tools.evaluation(vgg.logits, labels)
    optimizer = tools.optimize(loss, learning_rate, momentum)

    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(gpu_options=gpu_options)
    with tf.Session(config=config) as sess:
        if not os.path.exists(checkpoint):
            os.makedirs(checkpoint)

        sess.run(tf.global_variables_initializer())

        weight_files = ['vgg19.npy']
        if train_all == True:
            weight_files.append('last_layers.npz')
        tools.load_initial_weights(weight_files, sess, train_all)

        saver = tf.train.Saver()
        '''
        logger.info("Reading checkpoints...")
        ckpt = tf.train.get_checkpoint_state(checkpoint)
        if ckpt and ckpt.model_checkpoint_path:
           global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
           saver.restore(sess, ckpt.model_checkpoint_path)
           logger.info('Loading success, global_step is ' +  global_step)
        else:
           print('No checkpoint file found') 
        '''
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
        val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph)

        total_batch = total_train_count / BATCH_SIZE
        total_val_batch = total_val_count / BATCH_SIZE
        for epoch in range(0, num_epoch):
            for i in range(total_batch):
                try:
                    batch_xs, batch_ys = sess.run(
                        [train_batch, train_label_batch])  #左右两边命名不要一样
                    _ = sess.run(optimizer,
                                 feed_dict={
                                     imgs: batch_xs,
                                     labels: batch_ys,
                                     keep_pro: 0.7
                                 })
                    if i % 50 == 0:
                        train_loss, train_accuracy, summary_str = sess.run(
                            [loss, accuracy, summary_op],
                            feed_dict={
                                imgs: batch_xs,
                                labels: batch_ys,
                                keep_pro: 0.7
                            })
                        train_writer.add_summary(summary_str,
                                                 epoch * total_batch + i)
                        logger.info("Epoch: " + str(epoch) + " Step: " +
                                    str(i) + " Loss: " + str(train_loss))
                        logger.info("Training Accuracy --> " +
                                    str(train_accuracy))

                        batch_val_x, batch_val_y = sess.run(
                            [val_batch, val_label_batch])
                        val_loss, val_accuracy, val_summary_str = sess.run(
                            [loss, accuracy, summary_op],
                            feed_dict={
                                imgs: batch_val_x,
                                labels: batch_val_y,
                                keep_pro: 1.0
                            })
                        val_writer.add_summary(val_summary_str,
                                               epoch * total_batch + i)
                        logger.info("val Loss: " + str(train_loss))
                        logger.info("val Accuracy --> " + str(train_accuracy))
                except tf.errors.OutOfRangeError:
                    logger.info('batch out of range')

                break
            checkpoint_path = os.path.join(checkpoint, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=epoch)
            if train_all == False:
                tools.save_last_layers_weights(sess, vgg)
        # correct_val_count = 0
        # val_loss_total = 0.0
        #for i in range(total_val_batch):
        #   try:
        #      batch_val_x,batch_val_y = sess.run([val_batch, val_label_batch])
        #     val_loss,preds = sess.run([loss,num_correct_preds], feed_dict={imgs: batch_val_x, labels: batch_val_y})
        #    val_loss_total += val_loss
        #   correct_val_count+=preds
        #val_writer.add_summary(summary_str, epoch * total_batch + i)
        # except tf.errors.OutOfRangeError:
        #    logger.info('val batch out of range')
        #   break
        #logger.info("------------")
        #logger.info("Epoch: "+str (epoch+1)+" correct_val_count, total_val_count "+ str(correct_val_count)+" , "+str( total_val_count))
        #logger.info("Epoch: "+str (epoch+1)+ " Step: "+ str(i)+" Loss: "+str( val_loss_total/total_val_batch))
        #logger.info("Validation Data Accuracy --> "+str( 100.0*correct_val_count/(1.0*total_val_count)))
        #logger.info("------------")
        #break
        correct_test_count = 0
        total_test_batch = total_test_count / BATCH_SIZE
        for i in range(total_test_batch):
            try:
                batch_test_x, batch_test_y = sess.run(
                    [test_batch, test_label_batch])
                preds = sess.run(num_correct_preds,
                                 feed_dict={
                                     imgs: batch_test_x,
                                     labels: batch_test_y,
                                     keep_pro: 1.0
                                 })
                correct_test_count += preds
            except tf.errors.OutOfRangeError:
                logger.info('test batch out of range')
            break
        logger.info("correct_test_count, total_test_count " +
                    str(correct_test_count) + " , " + str(total_test_count))
        logger.info("Test Data Accuracy --> " +
                    str(100.0 * correct_test_count / (1.0 * total_test_count)))

        coord.request_stop()
        coord.join(threads)