def test(opt):
    logger = Logger(opt)
    dataset = VISTDataset(opt)
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.get_story_length()

    dataset.test()
    test_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers)
    evaluator = Evaluator(opt, 'test')
    model = models.setup(opt)
    model.cuda()
    predictions, metrics = evaluator.test_story(model, dataset, test_loader, opt)
    def test_model(self):
        self.output_dir = self.create_output_dir()

        save_root = os.path.join('results', self.cfg.EXP_DIR)
        save_dir = os.path.join(save_root, self.cfg.DATASET.DATASET)
        self.writer = SummaryWriter(log_dir=self.output_dir)
        previous = self.find_previous()
        iteration = 0

        eval_args = {
            'checkpoints_dir': save_root,
            'K': self.cfg.MODEL.K,
            'dataset': self.cfg.DATASET.DATASET,
            'path': self.cfg.DATASET.DATASET_DIR,
            'split': self.test_loader.dataset.split,
            'redo': False,
            'eval_mode': self.cfg.DATASET.INPUT_TYPE
        }

        if previous:
            for iteration, resume_checkpoint in zip(previous[0], previous[1]):
                if self.cfg.TEST.TEST_SCOPE[
                        0] <= iteration <= self.cfg.TEST.TEST_SCOPE[1]:
                    self.resume_checkpoint(resume_checkpoint)
                    self.validation_loop(iteration,
                                         phase=self.cfg.PHASE,
                                         save=save_dir)

                    ## Call evaluator object
                    eval_args['eval_iter'] = iteration
                    # eval_args['checkpoints_dir'] =  os.path.join(save_dir,
                    #                       self.test_loader.dataset.input_type + '-' + str(self.test_loader.dataset.split).zfill(2) + '-' + str(
                    #                           iteration).zfill(6))
                    evaluator_ = Evaluator(munchify(eval_args))
                    evaluator_.normal_summarize()

        else:
            print("Loading pretrained checkpoint")
            if len(self.cfg.RESUME_CHECKPOINT) == 2:
                self.resume_checkpoint_separate(self.cfg.RESUME_CHECKPOINT,
                                                self.cfg.TRAIN.RESUME_SCOPE)
            elif len(self.cfg.RESUME_CHECKPOINT) == 1:
                self.resume_checkpoint_pretrained(
                    self.model, self.cfg.RESUME_CHECKPOINT[0],
                    self.cfg.TRAIN.RESUME_SCOPE)
            self.validation_loop(iteration,
                                 phase=self.cfg.PHASE,
                                 save=save_dir)

            ## Call evaluator object
            eval_args['eval_iter'] = iteration
            # eval_args['checkpoints_dir'] =  os.path.join(save_dir,
            #                               self.test_loader.dataset.input_type + '-' + str(self.test_loader.dataset.split).zfill(2) + '-' + str(
            #                                   iteration).zfill(6))
            evaluator_ = Evaluator(munchify(eval_args))
            evaluator_.normal_summarize()
Exemple #3
0
def train(opt):
    logger = Logger(opt)
    flag = Flag(D_iters=opt.D_iter, G_iters=opt.G_iter, always=opt.always)
    ################### set up dataset and dataloader ########################
    dataset = VISTDataset(opt)
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.get_story_length()

    dataset.set_option(data_type={
        'whole_story': False,
        'split_story': True,
        'caption': False
    })

    dataset.train()
    train_loader = DataLoader(dataset,
                              batch_size=opt.batch_size,
                              shuffle=opt.shuffle,
                              num_workers=opt.workers)
    dataset.val()
    val_loader = DataLoader(dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.workers)

    ##################### set up model, criterion and optimizer ######
    bad_valid = 0

    # set up evaluator
    evaluator = Evaluator(opt, 'val')

    # set up criterion
    crit = criterion.LanguageModelCriterion()
    rl_crit = criterion.ReinforceCriterion(opt, dataset)

    # set up model
    model = models.setup(opt)
    model.cuda()
    disc_opt = copy.copy(opt)
    disc_opt.model = 'RewardModel'
    disc = models.setup(disc_opt)
    if os.path.exists(os.path.join(logger.log_dir, 'disc-model.pth')):
        logging.info("loading pretrained RewardModel")
        disc.load_state_dict(
            torch.load(os.path.join(logger.log_dir, 'disc-model.pth')))
    disc.cuda()

    # set up optimizer
    optimizer = setup_optimizer(opt, model)
    disc_optimizer = setup_optimizer(opt, disc)

    dataset.train()
    model.train()
    disc.train()
    ############################## training ##################################
    for epoch in range(logger.epoch_start, opt.max_epochs):
        # Assign the scheduled sampling prob

        start = time.time()
        for iter, batch in enumerate(train_loader):
            logger.iteration += 1
            torch.cuda.synchronize()

            feature_fc = Variable(batch['feature_fc']).cuda()
            target = Variable(batch['split_story']).cuda()
            index = batch['index']

            optimizer.zero_grad()
            disc_optimizer.zero_grad()

            if flag.flag == "Disc":
                model.eval()
                disc.train()
                if opt.decoding_method_DISC == 'sample':
                    seq, seq_log_probs, baseline = model.sample(
                        feature_fc,
                        sample_max=False,
                        rl_training=True,
                        pad=True)
                elif opt.decoding_method_DISC == 'greedy':
                    seq, seq_log_probs, baseline = model.sample(
                        feature_fc,
                        sample_max=True,
                        rl_training=True,
                        pad=True)
            else:
                model.train()
                disc.eval()
                seq, seq_log_probs, baseline = model.sample(feature_fc,
                                                            sample_max=False,
                                                            rl_training=True,
                                                            pad=True)

            seq = Variable(seq).cuda()
            mask = (seq > 0).float()
            mask = to_contiguous(
                torch.cat([
                    Variable(
                        mask.data.new(mask.size(0), mask.size(1), 1).fill_(1)),
                    mask[:, :, :-1]
                ], 2))
            normed_seq_log_probs = (seq_log_probs *
                                    mask).sum(-1) / mask.sum(-1)

            gen_score = disc(seq.view(-1, seq.size(2)),
                             feature_fc.view(-1, feature_fc.size(2)))

            if flag.flag == "Disc":
                gt_score = disc(target.view(-1, target.size(2)),
                                feature_fc.view(-1, feature_fc.size(2)))
                loss = -torch.sum(gt_score) + torch.sum(gen_score)

                avg_pos_score = torch.mean(gt_score)
                avg_neg_score = torch.mean(gen_score)

                if logger.iteration % 5 == 0:
                    logging.info("pos reward {} neg reward {}".format(
                        avg_pos_score.data[0], avg_neg_score.data[0]))
                    print(
                        "PREDICTION: ",
                        utils.decode_story(dataset.get_vocab(),
                                           seq[:1].data)[0])
                    print(
                        "GROUND TRUTH: ",
                        utils.decode_story(dataset.get_vocab(),
                                           target[:1].data)[0])
            else:
                rewards = Variable(gen_score.data -
                                   0.001 * normed_seq_log_probs.data)
                #with open("/tmp/reward.txt", "a") as f:
                #    print(" ".join(map(str, rewards.data.cpu().numpy())), file=f)
                loss, avg_score = rl_crit(seq.data, seq_log_probs, baseline,
                                          index, rewards)
                # if logger.iteration % opt.losses_log_every == 0:
                avg_pos_score = torch.mean(gen_score)
                logging.info("average reward: {} average IRL score: {}".format(
                    avg_score.data[0], avg_pos_score.data[0]))

            if flag.flag == "Disc":
                loss.backward()
                nn.utils.clip_grad_norm(disc.parameters(),
                                        opt.grad_clip,
                                        norm_type=2)
                disc_optimizer.step()
            else:
                tf_loss = crit(model(feature_fc, target), target)
                print("rl_loss / tf_loss = ", loss.data[0] / tf_loss.data[0])
                loss = opt.rl_weight * loss + (1 - opt.rl_weight) * tf_loss
                loss.backward()
                nn.utils.clip_grad_norm(model.parameters(),
                                        opt.grad_clip,
                                        norm_type=2)
                optimizer.step()

            train_loss = loss.data[0]
            torch.cuda.synchronize()

            # Write the training loss summary
            if logger.iteration % opt.losses_log_every == 0:
                logger.log_training(epoch, iter, train_loss, opt.learning_rate,
                                    model.ss_prob)
                logging.info(
                    "Epoch {} Train {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s"
                    .format(epoch, flag.flag, iter, len(train_loader),
                            train_loss,
                            time.time() - start))
                start = time.time()

            if logger.iteration % opt.save_checkpoint_every == 0:
                if opt.always is None:
                    # Evaluate on validation dataset and save model for every epoch
                    val_loss, predictions, metrics = evaluator.eval_story(
                        model, crit, dataset, val_loader, opt)
                    if opt.metric == 'XE':
                        score = -val_loss
                    else:
                        score = metrics[opt.metric]
                    logger.log_checkpoint(epoch, val_loss, metrics,
                                          predictions, opt, model, dataset,
                                          optimizer)
                    # halve the learning rate if not improving for a long time
                    if logger.best_val_score > score:
                        bad_valid += 1
                        if bad_valid >= 10:
                            opt.learning_rate = opt.learning_rate / 2.0
                            logging.info("halve learning rate to {}".format(
                                opt.learning_rate))
                            checkpoint_path = os.path.join(
                                logger.log_dir, 'model-best.pth')
                            model.load_state_dict(torch.load(checkpoint_path))
                            utils.set_lr(
                                optimizer,
                                opt.learning_rate)  # set the decayed rate
                            bad_valid = 0
                            logging.info("bad valid : {}".format(bad_valid))
                    else:
                        logging.info("achieving best {} score: {}".format(
                            opt.metric, score))
                        bad_valid = 0
                else:
                    torch.save(disc.state_dict(),
                               os.path.join(logger.log_dir, 'disc-model.pth'))
            flag.inc()
Exemple #4
0
def train(opt):
    logger = Logger(opt)  # 定义 logger
    flag = Flag(D_iters=opt.D_iter, G_iters=opt.G_iter,
                always=opt.always)  # 初始化训练标签

    dataset = VISTDataset(opt)  # 加载数据
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.get_story_length()
    dataset.set_option(data_type={
        'whole_story': False,
        'split_story': True,
        'caption': False
    })
    dataset.train()
    train_loader = DataLoader(dataset,
                              batch_size=opt.batch_size,
                              shuffle=opt.shuffle)
    dataset.val()
    val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False)
    bad_valid = 0

    evaluator = Evaluator(opt, 'val')
    crit = criterion.LanguageModelCriterion()
    rl_crit = criterion.ReinforceCriterion(opt, dataset)  # 强化学习的损失函数

    # set up model
    model = models.setup(opt)
    model.cuda()
    disc_opt = copy.copy(opt)
    disc_opt.model = 'RewardModel'  # 加入model属性
    disc = models.setup(disc_opt)  # 判别器模型,实例化哪个模型的类
    if os.path.exists(os.path.join('./data/save/',
                                   'disc-model.pth')):  # 若存在,则加载模型参数
        logging.info("loading pretrained RewardModel")
        disc.load_state_dict(
            torch.load(os.path.join(logger.log_dir, 'disc-model.pth')))
    disc.cuda()
    # 两个优化器,完全独立的两个模型
    optimizer = setup_optimizer(opt, model)
    disc_optimizer = setup_optimizer(disc_opt, disc)  # fix

    dataset.train()
    model.train()
    disc.train()
    ############################## training ##################################
    for epoch in range(logger.epoch_start, opt.max_epochs):  # 最大轮数为 50
        start = time.time()
        for iter, batch in enumerate(train_loader):  # 开始迭代
            logger.iteration += 1  # 记录迭代次数
            torch.cuda.synchronize()
            # 获取数据
            feature_fc = Variable(batch['feature_fc']).cuda()
            target = Variable(batch['split_story']).cuda()
            index = batch['index']

            optimizer.zero_grad()
            disc_optimizer.zero_grad()

            if flag.flag == "Disc":
                model.eval()  # policy model参数不更新
                disc.train()  # 更新判别器参数
                if opt.decoding_method_DISC == 'sample':  # True,返回 sample 的序列,根据概率分布 sample
                    seq, seq_log_probs, baseline = model.sample(
                        feature_fc,
                        sample_max=False,
                        rl_training=True,
                        pad=True)
                elif opt.decoding_method_DISC == 'greedy':
                    seq, seq_log_probs, baseline = model.sample(
                        feature_fc,
                        sample_max=True,
                        rl_training=True,
                        pad=True)
            else:
                model.train()  # 更新模型
                disc.eval()  # 判别器不更新
                seq, seq_log_probs, baseline = model.sample(feature_fc,
                                                            sample_max=False,
                                                            rl_training=True,
                                                            pad=True)

            seq = Variable(seq).cuda()
            mask = (seq > 0).float()  # 64,5,30
            mask = to_contiguous(
                torch.cat([
                    Variable(
                        mask.data.new(mask.size(0), mask.size(1), 1).fill_(1)),
                    mask[:, :, :-1]
                ], 2))
            normed_seq_log_probs = (seq_log_probs * mask).sum(-1) / mask.sum(
                -1)  # 64,5,得到整个序列的概率
            gen_score = disc(
                seq.view(-1, seq.size(2)),
                feature_fc.view(-1, feature_fc.size(2)))  # 计算sample序列的reward分数

            if flag.flag == "Disc":  # 先训练判别器,生成器已经预训练好。训练该判别器参数,使其能对标签和生成数据进行打分。
                gt_score = disc(target.view(-1, target.size(2)),
                                feature_fc.view(
                                    -1, feature_fc.size(2)))  # 计算真实序列的reward
                loss = -torch.sum(gt_score) + torch.sum(
                    gen_score)  # 计算损失,loss为负很正常
                # 计算平均 reward,训练判别器希望能尽可能pos高
                avg_pos_score = torch.mean(gt_score)
                avg_neg_score = torch.mean(gen_score)

                if logger.iteration % 5 == 0:
                    logging.info("pos reward {} neg reward {}".format(
                        avg_pos_score.item(), avg_neg_score.item()))
                    # print("PREDICTION: ", utils.decode_story(dataset.get_vocab(), seq[:1].data)[0])
                    # print("GROUND TRUTH: ", utils.decode_story(dataset.get_vocab(), target[:1].data)[0])
            else:
                rewards = Variable(gen_score.data -
                                   0 * normed_seq_log_probs.view(-1).data)
                #with open("/tmp/reward.txt", "a") as f:
                #    print(" ".join(map(str, rewards.data.cpu().numpy())), file=f)
                loss, avg_score = rl_crit(seq.data, seq_log_probs, baseline,
                                          index, rewards.view(-1, seq.size(1)))
                # if logger.iteration % opt.losses_log_every == 0:
                avg_pos_score = torch.mean(gen_score)
                # logging.info("average reward: {} average IRL score: {}".format(avg_score.item(), avg_pos_score.item()))

            if flag.flag == "Disc":
                loss.backward()
                nn.utils.clip_grad_norm(disc.parameters(),
                                        opt.grad_clip,
                                        norm_type=2)
                disc_optimizer.step()
            else:
                tf_loss = crit(model(feature_fc, target), target)
                # print("rl_loss / tf_loss = ", loss.item() / tf_loss.item())
                loss = opt.rl_weight * loss + (1 - opt.rl_weight) * tf_loss
                loss.backward()
                nn.utils.clip_grad_norm(model.parameters(),
                                        opt.grad_clip,
                                        norm_type=2)
                optimizer.step()

            train_loss = loss.item()
            torch.cuda.synchronize()

            # Write the training loss summary
            if logger.iteration % opt.losses_log_every == 0:
                logger.log_training(epoch, iter, train_loss, opt.learning_rate,
                                    model.ss_prob)
                logging.info(
                    "Epoch {} Train {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s"
                    .format(epoch, flag.flag, iter, len(train_loader),
                            train_loss,
                            time.time() - start))
                start = time.time()

            if logger.iteration % opt.save_checkpoint_every == 0:
                if opt.always is None:
                    # Evaluate on validation dataset and save model for every epoch
                    val_loss, predictions, metrics = evaluator.eval_story(
                        model, crit, dataset, val_loader, opt)
                    if opt.metric == 'XE':
                        score = -val_loss
                    else:
                        score = metrics[opt.metric]
                    logger.log_checkpoint(epoch, val_loss, metrics,
                                          predictions, opt, model, dataset,
                                          optimizer)
                    # halve the learning rate if not improving for a long time
                    if logger.best_val_score > score:
                        bad_valid += 1
                        if bad_valid >= 10:
                            opt.learning_rate = opt.learning_rate / 2.0
                            logging.info("halve learning rate to {}".format(
                                opt.learning_rate))
                            checkpoint_path = os.path.join(
                                logger.log_dir, 'model-best.pth')
                            model.load_state_dict(torch.load(checkpoint_path))
                            utils.set_lr(
                                optimizer,
                                opt.learning_rate)  # set the decayed rate
                            bad_valid = 0
                            logging.info("bad valid : {}".format(bad_valid))
                    else:
                        logging.info("achieving best {} score: {}".format(
                            opt.metric, score))
                        bad_valid = 0
                else:
                    torch.save(disc.state_dict(),
                               os.path.join(logger.log_dir, 'disc-model.pth'))
            flag.inc()
Exemple #5
0
def main(args):
    tf.config.experimental.list_physical_devices('GPU')
    # tf.device(f'/gpu:{args.gpu_num}')

    train_path = args.train_dataset
    valid_path = args.valid_dataset
    weights_path = args.weights
    # Path to text? file containing all classes, 1 per line
    classes_file = args.classes
    # Usually fit
    # mode = 'fit'  # Can be 'fit', 'eager_fit', 'eager_tf', 'valid'
    mode = args.mode
    '''
    'fit: model.fit, '
    'eager_fit: model.fit(run_eagerly=True), '
    'eager_tf: custom GradientTape'
    '''

    # Usually darknet
    transfer = args.transfer
    '''
    'none: Training from scratch, '
                      'darknet: Transfer darknet, '
                      'no_output: Transfer all but output, '
                      'frozen: Transfer and freeze all, '
                      'fine_tune: Transfer all and freeze darknet only'),
                      'pre': Use a pre-trained model for validation
    '''
    image_size = cfg.IMAGE_SIZE

    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = cfg.LEARNING_RATE
    num_classes = args.num_classes
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = args.num_weight_class

    # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/'
    saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/trained_{}.tf'.format(num_epochs)
    saved_weights_path = args.saved_weights

    # Original Anchors below
    anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                             (59, 119), (116, 90), (156, 198), (373, 326)],
                            np.float32) / 608

    anchors = cfg.YOLO_ANCHORS

    anchor_masks = cfg.YOLO_ANCHOR_MASKS

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if args.no_train:
        print('Skipping training...')
    else:
        start_time = time.time()
        model = YoloV3(image_size, training=True, classes=num_classes)

        train_dataset = dataset.load_tfrecord_dataset(train_path,
                                                      classes_file,
                                                      image_size)
        train_dataset = train_dataset.shuffle(buffer_size=512)
        train_dataset = train_dataset.batch(batch_size)
        train_dataset = train_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))
        train_dataset = train_dataset.prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE)

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(batch_size)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        # Configure the model for transfer learning
        if transfer == 'none':
            pass  # Nothing to do
        elif transfer in ['darknet', 'no_output']:
            # Darknet transfer is a special case that works
            # with incompatible number of classes
            # reset top layers
            model_pretrained = YoloV3(image_size,
                                      training=True,
                                      classes=weight_num_classes or num_classes)
            model_pretrained.load_weights(weights_path)

            if transfer == 'darknet':
                model.get_layer('yolo_darknet').set_weights(
                    model_pretrained.get_layer('yolo_darknet').get_weights())
                freeze_all(model.get_layer('yolo_darknet'))

            elif transfer == 'no_output':
                for layer in model.layers:
                    if not layer.name.startswith('yolo_output'):
                        layer.set_weights(model_pretrained.get_layer(
                            layer.name).get_weights())
                        freeze_all(layer)
        elif transfer == 'pre':
            model = YoloV3(image_size,
                           training=False,
                           classes=num_classes)
            model.load_weights(weights_path)

        else:
            # All other transfer require matching classes
            model.load_weights(weights_path)
            if transfer == 'fine_tune':
                # freeze darknet and fine tune other layers
                darknet = model.get_layer('yolo_darknet')
                freeze_all(darknet)
            elif transfer == 'frozen':
                # freeze everything
                freeze_all(model)
        optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
        loss = [YoloLoss(anchors[mask], classes=num_classes)
                for mask in anchor_masks]  # Passing loss as a list might sometimes fail? dict might be better?

        if mode == 'eager_tf':
            # Eager mode is great for debugging
            # Non eager graph mode is recommended for real training
            avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
            avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
            for epoch in range(1, num_epochs + 1):
                for batch, (images, labels) in enumerate(train_dataset):
                    with tf.GradientTape() as tape:
                        outputs = model(images, training=True)
                        regularization_loss = tf.reduce_sum(model.losses)
                        pred_loss = []
                        for output, label, loss_fn in zip(outputs, labels, loss):
                            pred_loss.append(loss_fn(label, output))
                        total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                    grads = tape.gradient(total_loss, model.trainable_variables)
                    optimizer.apply_gradients(
                        zip(grads, model.trainable_variables))
                    print("{}_train_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                    avg_loss.update_state(total_loss)
                for batch, (images, labels) in enumerate(val_dataset):
                    outputs = model(images)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                    print("{}_val_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                    avg_val_loss.update_state(total_loss)
                print("{}, train: {}, val: {}".format(
                    epoch,
                    avg_loss.result().numpy(),
                    avg_val_loss.result().numpy()))
                avg_loss.reset_states()
                avg_val_loss.reset_states()

                model.save_weights(
                    'checkpoints/yolov3_train_{}.tf'.format(epoch))
        elif mode == 'valid':
            pass  # Pass this step for validation only
        else:
            model.compile(optimizer=optimizer, loss=loss,
                          run_eagerly=(mode == 'eager_fit'))
            callbacks = [
                ReduceLROnPlateau(verbose=1, min_lr=1e-4, patience=50),
                # EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint('checkpoints/midpoints/yolov3_train_{epoch}.tf',
                                verbose=1, save_weights_only=True),
                TensorBoard(log_dir=f'logs/{saved_weights_path[:-3]}')
            ]

            history = model.fit(train_dataset,
                                epochs=num_epochs,
                                callbacks=callbacks,
                                validation_data=val_dataset)
            print(f'Saving weights to: {saved_weights_path}')
            model.save_weights(saved_weights_path)
        finish_time = time.time()
        train_time = finish_time - start_time
        print('Training time elapsed: {}'.format(train_time))

    # Calculate mAP
    if args.validate:
        print('Validating...')
        model = YoloV3(image_size, training=False, classes=num_classes)
        model.load_weights(saved_weights_path).expect_partial()

        batch_size = 1

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(batch_size)

        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        images = []
        for img, labs in val_dataset:
            img = np.squeeze(img)
            images.append(img)

        predictions = []

        evaluator = Evaluator(iou_thresh=args.iou)

        # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
        boxes, scores, classes, num_detections = model.predict(val_dataset)
        print(boxes.shape)
        print(boxes[0])
        # boxes -> (num_imgs, num_detections, box coords)

        filtered_labels = []
        for _, label in val_dataset:
            filt_labels = flatten_labels(label)
            filtered_labels.append(filt_labels)

        # i is the num_images index
        for img in range(len(num_detections)):
            row = []
            for sc in range(len(scores[img])):
                if scores[img][sc] > 0:
                    row.append(np.hstack([boxes[img][sc] * image_size, scores[img][sc], classes[img][sc]]))
            predictions.append(np.asarray(row))

        predictions = np.asarray(predictions)  # numpy array of shape [num_imgs x num_preds x 6]

        if len(predictions) == 0:  # No predictions made
            print('No predictions made - exiting.')
            exit()

        # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
        # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
        # Box coords should be in format x1 y1 x2 y2

        evaluator(predictions, filtered_labels, images, roc=False)  # Check gts box coords

    if args.valid_imgs:  # Predictions
        print('Valid Images...')
        # yolo = YoloV3(classes=num_classes)
        yolo = YoloV3(image_size, training=False, classes=num_classes)
        yolo.load_weights(saved_weights_path).expect_partial()
        print('weights loaded')

        print('Validation Image...')
        # Find better way to do this so not requiring manual changes
        class_dict = cfg.CLASS_DICT

        class_names = list(class_dict.values())
        print('classes loaded')

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))


        # boxes, scores, classes, num_detections
        index = 0
        for img_raw, _label in val_dataset.take(5):
            print(f'Index {index}')

            #img = tf.expand_dims(img_raw, 0)
            img = transform_images(img_raw, image_size)
            img = img * 255

            boxes, scores, classes, nums = yolo(img)

            filt_labels = flatten_labels(_label)

            boxes = tf.expand_dims(filt_labels[:, 0:4], 0)
            scores = tf.expand_dims(filt_labels[:, 4], 0)
            classes = tf.expand_dims(filt_labels[:, 5], 0)
            nums = tf.expand_dims(filt_labels.shape[0], 0)

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            # img = img * 255

            output = 'test_images/test_{}.jpg'.format(index)
            # output = '/Users/justinbutler/Desktop/test/test_images/test_{}.jpg'.format(index)

            # print('detections:')
            # for i in range(nums[index]):
            #     print('\t{}, {}, {}'.format(class_names[int(classes[index][i])],
            #                               np.array(scores[index][i]),
            #                               np.array(boxes[index][i])))
            #     if i > 10:
            #         continue

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            img = img * 255
            cv2.imwrite(output, img)

            index = index + 1

    if args.visual_data:
        print('Visual Data...')
        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        index = 0
        for img_raw, _label in val_dataset.take(5):
            print(f'Index {index}')
            # img = tf.expand_dims(img_raw, 0)
            img = transform_images(img_raw, image_size)

            output = 'test_images/test_labels_{}.jpg'.format(index)
            # output = '/Users/justinbutler/Desktop/test/test_images/test_labels_{}.jpg'.format(index)

            filt_labels = flatten_labels(_label)

            boxes = tf.expand_dims(filt_labels[:, 0:4], 0)
            scores = tf.expand_dims(filt_labels[:, 4], 0)
            classes = tf.expand_dims(filt_labels[:, 5], 0)
            nums = tf.expand_dims(filt_labels.shape[0], 0)

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            img = img * 255

            cv2.imwrite(output, img)

            index = index + 1

        return
Exemple #6
0
def init_module(morph):
    global ev
    ev = Evaluator(morph=morph)
Exemple #7
0
def train(opt):
    """
    模型训练函数
    """
    # 自定义的类,日志记录
    logger = Logger(opt)

    # 获取数据
    dataset = VISTDataset(opt)
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.get_story_length()
    # print(dataset.get_word2id()['the'])
    dataset.set_option(data_type={
        'whole_story': False,
        'split_story': True,
        'caption': True
    })  # 若不使用caption数据,则将其设为False
    dataset.train()
    train_loader = DataLoader(dataset,
                              batch_size=opt.batch_size,
                              shuffle=opt.shuffle)
    dataset.test()  # 改为valid
    val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False)
    # m = dataset.word2id

    # 记录上升的 valid_loss 次数
    bad_valid = 0

    # 创建Evaluator
    evaluator = Evaluator(opt, 'val')
    # 损失
    crit = criterion.LanguageModelCriterion()
    # 是否使用强化学习,默认为-1
    if opt.start_rl >= 0:
        rl_crit = criterion.ReinforceCriterion(opt, dataset)

    # set up model,函数在init文件中,若有原来模型,则加载模型参数
    model = models.setup(opt)
    model.cuda()
    optimizer = setup_optimizer(opt, model)
    dataset.train()
    model.train()
    for epoch in range(logger.epoch_start, opt.max_epochs):  # 默认为 0-20
        # scheduled_sampling_start表示在第几个epoch,衰减gt使用概率,最大到0.25,5个epoch之内还是0
        if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
            frac = (
                epoch - opt.scheduled_sampling_start
            ) // opt.scheduled_sampling_increase_every  # 后者默认值为5,//为向下取整除
            opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac,
                              opt.scheduled_sampling_max_prob)  # 0.05、0.25
            model.ss_prob = opt.ss_prob
        # 对数据进行一个batch一个batch的迭代
        for iter, batch in enumerate(train_loader):
            start = time.time()
            logger.iteration += 1
            torch.cuda.synchronize()

            # 获取batch中的数据,图像特征、caption、以及target
            features = Variable(batch['feature_fc']).cuda()  # 64*5*2048
            caption = None
            if opt.caption:
                caption = Variable(batch['caption']).cuda()  # 64*5*20
            target = Variable(batch['split_story']).cuda()  # 64*5*30
            index = batch['index']

            optimizer.zero_grad()

            # 模型运行,返回一个概率分布,然后计算交叉熵损失
            output = model(features, target, caption)
            loss = crit(output, target)

            if opt.start_rl >= 0 and epoch >= opt.start_rl:  # reinforcement learning
                # 获取 sample 数据和 baseline 数据
                seq, seq_log_probs, baseline = model.sample(features,
                                                            caption=caption,
                                                            sample_max=False,
                                                            rl_training=True)
                rl_loss, avg_score = rl_crit(seq, seq_log_probs, baseline,
                                             index)
                print(rl_loss.data[0] / loss.data[0])
                loss = opt.rl_weight * rl_loss + (1 - opt.rl_weight) * loss
                logging.info("average {} score: {}".format(
                    opt.reward_type, avg_score))
            # 反向传播
            loss.backward()
            train_loss = loss.item()
            # 梯度裁剪,第二个参数为梯度最大范数,大于该值则进行裁剪
            nn.utils.clip_grad_norm(model.parameters(),
                                    opt.grad_clip,
                                    norm_type=2)
            optimizer.step()
            torch.cuda.synchronize()
            # 日志记录时间以及损失
            logging.info(
                "Epoch {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s".
                format(epoch, iter, len(train_loader), train_loss,
                       time.time() - start))
            # Write the training loss summary,tensorboard记录
            if logger.iteration % opt.losses_log_every == 0:
                logger.log_training(epoch, iter, train_loss, opt.learning_rate,
                                    model.ss_prob)
            # validation验证,每迭代save_checkpoint_every轮评测一次
            if logger.iteration % opt.save_checkpoint_every == 0:
                val_loss, predictions, metrics = evaluator.eval_story(
                    model, crit, dataset, val_loader, opt)
                if opt.metric == 'XE':
                    score = -val_loss
                else:
                    score = metrics[opt.metric]
                logger.log_checkpoint(epoch, val_loss, metrics, predictions,
                                      opt, model, dataset, optimizer)
                # halve the learning rate if not improving for a long time
                if logger.best_val_score > score:
                    bad_valid += 1
                    if bad_valid >= 4:
                        opt.learning_rate = opt.learning_rate / 2.0
                        logging.info("halve learning rate to {}".format(
                            opt.learning_rate))
                        checkpoint_path = os.path.join(logger.log_dir,
                                                       'model-best.pth')
                        model.load_state_dict(torch.load(checkpoint_path))
                        utils.set_lr(optimizer,
                                     opt.learning_rate)  # set the decayed rate
                        bad_valid = 0
                        logging.info("bad valid : {}".format(bad_valid))
                else:
                    logging.info("achieving best {} score: {}".format(
                        opt.metric, score))
                    bad_valid = 0
Exemple #8
0
def train(opt):
    # utils.setup_seed()
    logger = Logger(opt, save_code=opt.save_code)

    ################### set up dataset and dataloader ########################
    dataset = VISTDataset(opt)
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.get_story_length()

    dataset.set_option(data_type={'whole_story': False, 'split_story': True, 'caption': False, 'prefix_story': True})

    dataset.train()
    train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.workers)
    dataset.val()
    val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers)

    ##################### set up model, criterion and optimizer ######
    bad_valid = 0

    # set up evaluator
    evaluator = Evaluator(opt, 'val')

    # set up criterion
    crit = criterion.LanguageModelCriterion()

    # set up model
    model = models.setup(opt)
    model.cuda()

    # set up optimizer
    optimizer = setup_optimizer(opt, model)

    dataset.train()
    model.train()
    initial_lr = opt.learning_rate
    logging.info(model)
    ############################## training ##################################
    for epoch in range(logger.epoch_start, opt.max_epochs):
        # Assign the scheduled sampling prob
        if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
            frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
            opt.ss_prob = min(opt.scheduled_sampling_increase_prob *
                              frac, opt.scheduled_sampling_max_prob)
            model.ss_prob = opt.ss_prob

        for iter, batch in enumerate(train_loader):
            start = time.time()
            logger.iteration += 1
            torch.cuda.synchronize()

            feature_fc = batch['feature_fc'].cuda()
            if opt.use_obj:
                feature_obj = batch['feature_obj'].cuda()
                if opt.use_spatial:
                    feature_obj_spatial = batch['feature_obj_spatial'].cuda()
                else:
                    feature_obj_spatial = None
                if opt.use_classes:
                    feature_obj_classes = batch['feature_obj_classes'].cuda()
                else:
                    feature_obj_classes = None
                if opt.use_attrs:
                    feature_obj_attrs = batch['feature_obj_attrs'].cuda()
                else:
                    feature_obj_attrs = None
            target = batch['split_story'].cuda()
            prefix = batch['prefix_story'].cuda()
            history_count = batch['history_counter'].cuda()
            index = batch['index']

            optimizer.zero_grad()

            # cross entropy loss
            output = model(feature_fc, feature_obj, target, history_count, spatial=feature_obj_spatial,
                               clss=feature_obj_classes, attrs=feature_obj_attrs)
            loss = crit(output, target)

            loss.backward()
            train_loss = loss.item()

            nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip, norm_type=2)
            optimizer.step()
            torch.cuda.synchronize()

            if iter % opt.log_step == 0:
                logging.info("Epoch {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s".format(epoch, iter,
                                                                                                  len(train_loader),
                                                                                                  train_loss,
                                                                                                  time.time() - start))
            # Write the training loss summary
            if logger.iteration % opt.losses_log_every == 0:
                logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob)

            if logger.iteration % opt.save_checkpoint_every == 0:
                # Evaluate on validation dataset and save model for every epoch
                val_loss, predictions, metrics = evaluator.eval_story(model, crit, dataset, val_loader, opt)
                if opt.metric == 'XE':
                    score = -val_loss
                else:
                    score = metrics[opt.metric]
                logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer)
                # halve the learning rate if not improving for a long time
                if logger.best_val_score > score:
                    bad_valid += 1
                    if bad_valid >= opt.bad_valid_threshold:
                        opt.learning_rate = opt.learning_rate * opt.learning_rate_decay_rate
                        logging.info("halve learning rate to {}".format(opt.learning_rate))
                        checkpoint_path = os.path.join(logger.log_dir, 'model-best.pth')
                        model.load_state_dict(torch.load(checkpoint_path))
                        utils.set_lr(optimizer, opt.learning_rate)  # set the decayed rate
                        bad_valid = 0
                        logging.info("bad valid : {}".format(bad_valid))
                else:
                    opt.learning_rate = initial_lr
                    logging.info("achieving best {} score: {}".format(opt.metric, score))
                    bad_valid = 0
def train(opt):
    setup_seed()
    logger = Logger(opt)

    ################### set up dataset and dataloader ########################
    dataset = VISTDataset(opt)
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.get_story_length()

    dataset.set_option(data_type={'whole_story': False, 'split_story': True, 'caption': False})

    dataset.train()
    train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.workers)
    dataset.val()
    val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers)

    ##################### set up model, criterion and optimizer ######
    bad_valid = 0

    # set up evaluator
    evaluator = Evaluator(opt, 'val')

    # set up criterion
    crit = criterion.LanguageModelCriterion()
    if opt.start_rl >= 0:
        rl_crit = criterion.ReinforceCriterion(opt, dataset)

    # set up model
    model = models.setup(opt)
    model.cuda()

    # set up optimizer
    optimizer = setup_optimizer(opt, model)

    dataset.train()
    model.train()
    ############################## training ##################################
    for epoch in range(logger.epoch_start, opt.max_epochs):
        # Assign the scheduled sampling prob
        if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
            frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
            opt.ss_prob = min(opt.scheduled_sampling_increase_prob *
                              frac, opt.scheduled_sampling_max_prob)
            model.ss_prob = opt.ss_prob

        for iter, batch in enumerate(train_loader):
            start = time.time()
            logger.iteration += 1
            torch.cuda.synchronize()

            feature_fc = Variable(batch['feature_fc']).cuda()
            target = Variable(batch['split_story']).cuda()
            index = batch['index']
            semantic = batch['semantic']

            optimizer.zero_grad()

            # cross entropy loss
            output = model(feature_fc, target, semantic)
            loss = crit(output, target)

            if opt.start_rl >= 0 and epoch >= opt.start_rl:  # reinforcement learning
                seq, seq_log_probs, baseline = model.sample(feature_fc, sample_max=False, rl_training=True)
                rl_loss, avg_score = rl_crit(seq, seq_log_probs, baseline, index)
                print(rl_loss.data[0] / loss.data[0])
                loss = opt.rl_weight * rl_loss + (1 - opt.rl_weight) * loss
                logging.info("average {} score: {}".format(opt.reward_type, avg_score))

            loss.backward()
            train_loss = loss.data[0]

            nn.utils.clip_grad_norm(model.parameters(), opt.grad_clip, norm_type=2)
            optimizer.step()
            torch.cuda.synchronize()

            logging.info("Epoch {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s".format(epoch, iter,
                                                                                              len(train_loader),
                                                                                              train_loss,
                                                                                              time.time() - start))
            # Write the training loss summary
            if logger.iteration % opt.losses_log_every == 0:
                logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob)

            if logger.iteration % opt.save_checkpoint_every == 0:
                # Evaluate on validation dataset and save model for every epoch
                val_loss, predictions, metrics = evaluator.eval_story(model, crit, dataset, val_loader, opt)
                if opt.metric == 'XE':
                    score = -val_loss
                else:
                    score = metrics[opt.metric]
                logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer)
                # halve the learning rate if not improving for a long time
                if logger.best_val_score > score:
                    bad_valid += 1
                    if bad_valid >= 4:
                        opt.learning_rate = opt.learning_rate / 2.0
                        logging.info("halve learning rate to {}".format(opt.learning_rate))
                        checkpoint_path = os.path.join(logger.log_dir, 'model-best.pth')
                        model.load_state_dict(torch.load(checkpoint_path))
                        utils.set_lr(optimizer, opt.learning_rate)  # set the decayed rate
                        bad_valid = 0
                        logging.info("bad valid : {}".format(bad_valid))
                else:
                    logging.info("achieving best {} score: {}".format(opt.metric, score))
                    bad_valid = 0
Exemple #10
0
def main():

    train_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_train.record-00000-of-00001'
    valid_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_val.record-00000-of-00001'
    weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/checkpoints/yolov3.tf'
    # Path to text? file containing all classes, 1 per line
    classes = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/shapes/shapes.names'
    # Usually fit
    # mode = 'fit'  # Can be 'fit', 'eager_fit', 'eager_tf', 'valid'
    mode = 'fit'
    '''
    'fit: model.fit, '
    'eager_fit: model.fit(run_eagerly=True), '
    'eager_tf: custom GradientTape'
    '''

    # Usually darknet
    transfer = 'none'
    '''
    'none: Training from scratch, '
                      'darknet: Transfer darknet, '
                      'no_output: Transfer all but output, '
                      'frozen: Transfer and freeze all, '
                      'fine_tune: Transfer all and freeze darknet only'),
                      'pre': Use a pre-trained model for validation
    '''
    image_size = 416
    num_epochs = 1
    batch_size = 8
    learning_rate = 1e-3
    num_classes = 4
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = 80
    iou_threshold = 0.5

    # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/'
    saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/shapes_{}.tf'.format(
        num_epochs)
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    # Training dataset
    #dataset_train = tf.data.TFRecordDataset(train_path)
    #dataset_val = tf.data.TFRecordDataset(valid_path)

    dataset_train = load_tfrecord_dataset(train_path, classes, image_size)
    dataset_train = dataset_train.shuffle(buffer_size=512)
    dataset_train = dataset_train.batch(batch_size)
    #dataset_train = dataset_train.map(lambda x, y: (
    #    transform_images(x, image_size),
    #    transform_targets(y, anchors, anchor_masks, image_size)))
    #dataset_train = dataset_train.prefetch(
    #    buffer_size=tf.data.experimental.AUTOTUNE)

    dataset_val = load_tfrecord_dataset(valid_path, classes, image_size)
    dataset_val = dataset_val.shuffle(buffer_size=512)
    dataset_val = dataset_val.batch(batch_size)
    #dataset_val = dataset_val.map(lambda x, y: (
    #    transform_images(x, image_size),
    #    transform_targets(y, anchors, anchor_masks, image_size)))

    # Create model in training mode
    yolo = models.YoloV3(image_size, training=True, classes=num_classes)

    model_pretrained = YoloV3(image_size,
                              training=True,
                              classes=weight_num_classes or num_classes)
    model_pretrained.load_weights(weights_path)

    # Which weights to start with?
    print('Loading Weights...')
    #yolo.load_weights(weights_path)

    yolo.get_layer('yolo_darknet').set_weights(
        model_pretrained.get_layer('yolo_darknet').get_weights())
    freeze_all(yolo.get_layer('yolo_darknet'))

    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks
    ]  # Passing loss as a list might sometimes fail? dict might be better?

    yolo.compile(optimizer=optimizer,
                 loss=loss,
                 run_eagerly=(mode == 'eager_fit'))
    callbacks = [
        ReduceLROnPlateau(verbose=1),
        EarlyStopping(patience=3, verbose=1),
        ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                        verbose=1,
                        save_weights_only=True),
        TensorBoard(log_dir='logs')
    ]

    history = yolo.fit(dataset_train,
                       epochs=num_epochs,
                       callbacks=callbacks,
                       validation_data=dataset_val)
    yolo.save_weights(saved_weights_path)

    # Detect/ROC
    model = YoloV3(image_size, training=False, classes=num_classes)
    model.load_weights(saved_weights_path).expect_partial()

    batch_size = 1

    val_dataset = load_tfrecord_dataset(valid_path, classes, image_size)
    val_dataset = val_dataset.batch(batch_size)

    val_dataset = val_dataset.map(
        lambda x, y: (transform_images(x, image_size),
                      transform_targets(y, anchors, anchor_masks, image_size)))

    images = []
    for img, labs in val_dataset:
        img = np.squeeze(img)
        images.append(img)

    predictions = []

    evaluator = Evaluator(iou_thresh=iou_threshold)

    # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
    boxes, scores, classes, num_detections = model.predict(val_dataset)
    # boxes -> (num_imgs, num_detections (200), box coords (4))
    # scores -> (num_imgs, num_detections)
    # classes -> (num_imgs, num_detections)
    # num_detections -> num_imgs

    # Aim for labels shape (per batch): [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]]
    # full_labels = [label for _, label in val_dataset]

    # Shape : [Num images, 3 scales, grid, grid, anchor, 6 ]

    filtered_labels = []

    for _, label in val_dataset:
        img_labels = []
        # Label has shape [3 scales x[1, grid, grid, 3, 6]]
        for scale in label:
            # Shape [1, grid, grid, 3, 6]
            scale = np.asarray(scale)
            grid = scale.shape[1]

            scale2 = np.reshape(scale, (3, grid * grid, 6))
            # Shape: [3, grix*grid, 6]

            for anchor in scale2:
                filtered_anchors = []
                for box in anchor:
                    if box[4] > 0:
                        filtered_anchors.append(np.asarray(box))
            img_labels.append(filtered_anchors)

        img_labels = np.asarray(img_labels)
        filtered_labels.append(img_labels)

    print(len(filtered_labels))
    print(len(filtered_labels[0]))
    print(len(filtered_labels[0][2]))

    # i is the num_images index
    # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
    for img in range(len(num_detections)):
        row = []
        for sc in range(len(scores[img])):
            if scores[img][sc] > 0:
                row.append(
                    np.hstack([
                        boxes[img][sc] * image_size, scores[img][sc],
                        classes[img][sc]
                    ]))
        predictions.append(np.asarray(row))

    predictions = np.asarray(
        predictions)  # numpy array of shape [num_imgs x num_preds x 6]

    if len(predictions) == 0:  # No predictions made
        print('No predictions made - exiting.')
        exit()

    # Predictions shape: [num_imgs x num_preds x[box coords(4), conf, classes]]
    # Box coords should be in format x1 y1 x2 y2

    # Labels shape: [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]]
    evaluator(predictions, filtered_labels, images)  # Check gts box coords
    '''
Exemple #11
0
def main(args):

    image_size = 416  # 416
    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = 1e-3
    num_classes = args.num_classes
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = args.num_weight_class
    valid_path = args.valid_dataset
    weights_path = args.weights
    # Path to text? file containing all classes, 1 per line
    classes = args.classes

    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    val_dataset = dataset.load_tfrecord_dataset(valid_path, classes,
                                                image_size)
    val_dataset = val_dataset.batch(batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, image_size),
        dataset.transform_targets(y, anchors, anchor_masks, image_size)))

    model = YoloV3(image_size, training=True, classes=num_classes)
    # Darknet transfer is a special case that works
    # with incompatible number of classes
    # reset top layers
    model_pretrained = YoloV3(image_size,
                              training=True,
                              classes=weight_num_classes or num_classes)
    model_pretrained.load_weights(weights_path)

    if transfer == 'darknet':
        model.get_layer('yolo_darknet').set_weights(
            model_pretrained.get_layer('yolo_darknet').get_weights())
        freeze_all(model.get_layer('yolo_darknet'))

    predictions = []

    evaluator = Evaluator(iou_thresh=args.iou)

    # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
    boxes, scores, classes, num_detections = model.predict(val_dataset)
    # boxes -> (num_imgs, num_detections, box coords)

    # Full labels shape -> [num_batches, grid scale, imgs]
    # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]]
    full_labels = np.asarray([label for _, label in val_dataset])

    # Shape -> [num_batches, num_imgs_in_batch, 3]
    # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]]
    full_labels_trans = full_labels.transpose(0, 2, 1)

    full_labels_flat = []

    for batch in full_labels_trans:
        for img in batch:
            row = []
            for scale in img:
                row.append(scale)
            full_labels_flat.append(row)

    # Shape -> [num_imgs x 3]
    full_labels_flat = np.asarray(full_labels_flat)

    # Remove any labels consisting of all 0's
    filt_labels = []
    # for img in range(len(full_labels_flat)):
    for img in full_labels_flat:
        test = []
        # for scale in full_labels_flat[img]:
        for scale in img:
            lab_list = []
            for g1 in scale:
                for g2 in g1:
                    for anchor in g2:
                        if anchor[0] > 0:
                            temp = [
                                anchor[0] * image_size, anchor[1] * image_size,
                                anchor[2] * image_size, anchor[3] * image_size,
                                anchor[4], anchor[5]
                            ]
                            temp = [float(x) for x in temp]
                            lab_list.append(np.asarray(temp))
            test.append(np.asarray(lab_list))
        filt_labels.append(np.asarray(test))
    filt_labels = np.asarray(
        filt_labels
    )  # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]]
    # filt_labels = filt_labels[:, :4] * image_size

    # i is the num_images index
    # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
    for img in range(len(num_detections)):
        row = []
        for sc in range(len(scores[img])):
            if scores[img][sc] > 0:
                row.append(
                    np.hstack([
                        boxes[img][sc] * image_size, scores[img][sc],
                        classes[img][sc]
                    ]))
        predictions.append(np.asarray(row))

    predictions = np.asarray(
        predictions)  # numpy array of shape [num_imgs x num_preds x 6]

    if len(predictions) == 0:  # No predictions made
        print('No predictions made - exiting.')
        exit()

    # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
    # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
    # Box coords should be in format x1 y1 x2 y2

    evaluator(predictions, filt_labels, images)  # Check gts box coords

    confidence_thresholds = np.linspace(0.1, 1, 15)
    confidence_thresholds = [0.5]
    all_tp_rates = []
    all_fp_rates = []

    # Compute ROCs for above range of thresholds
    # Compute one for each class vs. the other classes
    for index, conf in enumerate(confidence_thresholds):
        tp_of_img = []
        fp_of_img = []
        all_classes = []

        tp_rates = {}
        fp_rates = {}

        boxes, scores, classes, num_detections = model.predict(val_dataset)

        # Full labels shape -> [num_batches, grid scale, imgs]
        # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]]
        full_labels = np.asarray([label for _, label in val_dataset])

        # Shape -> [num_batches, num_imgs_in_batch, 3]
        # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]]
        full_labels_trans = full_labels.transpose(0, 2, 1)

        full_labels_flat = []

        for batch in full_labels_trans:
            for img in batch:
                row = []
                for scale in img:
                    row.append(scale)
                full_labels_flat.append(row)

        # Shape -> [num_imgs x 3]
        full_labels_flat = np.asarray(full_labels_flat)

        # Remove any labels consisting of all 0's
        filt_labels = []
        # for img in range(len(full_labels_flat)):
        for img in full_labels_flat:
            test = []
            # for scale in full_labels_flat[img]:
            for scale in img:
                lab_list = []
                for g1 in scale:
                    for g2 in g1:
                        for anchor in g2:
                            if anchor[0] > 0:
                                temp = [
                                    anchor[0] * image_size,
                                    anchor[1] * image_size,
                                    anchor[2] * image_size,
                                    anchor[3] * image_size, anchor[4],
                                    anchor[5]
                                ]
                                temp = [float(x) for x in temp]
                                lab_list.append(np.asarray(temp))
                test.append(np.asarray(lab_list))
            filt_labels.append(np.asarray(test))
        filt_labels = np.asarray(
            filt_labels
        )  # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]]
        # filt_labels = filt_labels[:, :4] * image_size

        # i is the num_images index
        # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
        for img in range(len(num_detections)):
            row = []
            for sc in range(len(scores[img])):
                if scores[img][sc] > 0:
                    row.append(
                        np.hstack([
                            boxes[img][sc] * image_size, scores[img][sc],
                            classes[img][sc]
                        ]))
            predictions.append(np.asarray(row))

        predictions = np.asarray(
            predictions)  # numpy array of shape [num_imgs x num_preds x 6]

        if len(predictions) == 0:  # No predictions made
            print('No predictions made - exiting.')
            exit()

        # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
        # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
        # Box coords should be in format x1 y1 x2 y2

        evaluator(predictions, filt_labels, images)  # Check gts box coords

        classes = list(set(r['class_ids']))  # All unique class ids
        for c in classes:
            if c not in all_classes:
                all_classes.append(c)
        complete_classes = dataset_val.class_ids[1:]
        # Need TPR and FPR rates for each class versus the other classes
        # Recall == TPR
        tpr = utils.compute_ap_indiv_class(gt_bbox, gt_class_id, gt_mask,
                                           r["rois"], r["class_ids"],
                                           r["scores"], r['masks'],
                                           complete_classes)
        total_fpr = utils.compute_fpr_indiv_class(gt_bbox, gt_class_id,
                                                  gt_mask, r["rois"],
                                                  r["class_ids"], r["scores"],
                                                  r['masks'], complete_classes)
        # print(f'For Image: TPR: {tpr} -- FPR: {total_fpr}')
        tp_of_img.append(tpr)
        fp_of_img.append(total_fpr)

        all_classes = dataset_val.class_ids[1:]

        # Need to get average TPR and FPR for number of images used
        for c in all_classes:
            tp_s = 0
            for item in tp_of_img:
                if c in item.keys():
                    tp_s += item[c]
                else:
                    tp_s += 0

            tp_rates[c] = tp_s / len(image_ids)
            # tp_rates[c] = tp_s

        # print(tp_rates)

        for c in all_classes:
            fp_s = 0
            for item in fp_of_img:
                if c in item.keys():
                    fp_s += item[c]
                else:
                    fp_s += 0
            fp_rates[c] = fp_s / len(image_ids)
            # fp_rates[c] = fp_s

        all_fp_rates.append(fp_rates)
        all_tp_rates.append(tp_rates)

    print(f'TP Rates: {all_tp_rates}')
    print(f'FP Rates: {all_fp_rates}')