def beam_search(FLAGS): paddle.enable_static() if FLAGS.static else None device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") # yapf: disable inputs = [ Input([None, 1, 48, 384], "float32", name="pixel"), Input([None, None], "int64", name="label_in") ] labels = [ Input([None, None], "int64", name="label_out"), Input([None, None], "float32", name="mask") ] # yapf: enable model = paddle.Model(Seq2SeqAttInferModel(encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes, beam_size=FLAGS.beam_size), inputs=inputs, labels=labels) model.prepare(metrics=SeqBeamAccuracy()) model.load(FLAGS.init_model) test_dataset = data.test() test_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) test_sampler = data.BatchSampler(test_dataset, batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) test_loader = paddle.io.DataLoader(test_dataset, batch_sampler=test_sampler, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) model.evaluate(eval_data=test_loader, callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
def main(FLAGS): device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") fluid.enable_dygraph(device) if FLAGS.dynamic else None inputs = [ Input([None, 1, 48, 384], "float32", name="pixel"), ] model = paddle.Model( Seq2SeqAttInferModel(encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes, beam_size=FLAGS.beam_size), inputs) model.prepare() model.load(FLAGS.init_model) fn = lambda p: Image.open(p).convert('L') test_dataset = ImageFolder(FLAGS.image_path, loader=fn) test_collate_fn = BatchCompose([data.Resize(), data.Normalize()]) test_loader = fluid.io.DataLoader(test_dataset, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) samples = test_dataset.samples #outputs = model.predict(test_loader) ins_id = 0 for image, in test_loader: image = image if FLAGS.dynamic else image[0] pred = model.test_batch([image])[0] pred = pred[:, :, np.newaxis] if len(pred.shape) == 2 else pred pred = np.transpose(pred, [0, 2, 1]) for ins in pred: impath = samples[ins_id] ins_id += 1 print('Image {}: {}'.format(ins_id, impath)) for beam_idx, beam in enumerate(ins): id_list = postprocess(beam) word_list = index2word(id_list) sequence = "".join(word_list) print('{}: {}'.format(beam_idx, sequence))
def main(): device = paddle.set_device(FLAGS.device) paddle.disable_static(device) if FLAGS.dynamic else None if not FLAGS.eval_only: # training mode train_transform = Compose([ ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()]) dataset = COCODataset(dataset_dir=FLAGS.data, anno_path='annotations/instances_train2017.json', image_dir='train2017', with_background=False, mixup=True, transform=train_transform) batch_sampler = DistributedBatchSampler(dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) loader = DataLoader(dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=train_collate_fn) else: # evaluation mode eval_transform = Compose([ ResizeImage(target_size=608), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) eval_collate_fn = BatchCompose([NormalizeImage()]) dataset = COCODataset(dataset_dir=FLAGS.data, anno_path='annotations/instances_val2017.json', image_dir='val2017', with_background=False, transform=eval_transform) # batch_size can only be 1 in evaluation for YOLOv3 # prediction bbox is a LoDTensor batch_sampler = DistributedBatchSampler(dataset, batch_size=1, shuffle=False, drop_last=False) loader = DataLoader(dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=eval_collate_fn) pretrained = FLAGS.eval_only and FLAGS.weights is None model = yolov3_darknet53(num_classes=dataset.num_classes, num_max_boxes=NUM_MAX_BOXES, model_mode='eval' if FLAGS.eval_only else 'train', pretrained=pretrained) if FLAGS.pretrain_weights and not FLAGS.eval_only: model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True) optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=YoloLoss(num_classes=dataset.num_classes)) # NOTE: we implement COCO metric of YOLOv3 model here, separately # from 'prepare' and 'fit' framework for follwing reason: # 1. YOLOv3 network structure is different between 'train' and # 'eval' mode, in 'eval' mode, output prediction bbox is not the # feature map used for YoloLoss calculating # 2. COCO metric behavior is also different from defined Metric # for COCO metric should not perform accumulate in each iteration # but only accumulate at the end of an epoch if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) preds = model.predict(loader, stack_outputs=False) _, _, _, img_ids, bboxes = preds anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json') coco_metric = COCOMetric(anno_path=anno_path, with_background=False) for img_id, bbox in zip(img_ids, bboxes): coco_metric.update(img_id, bbox) coco_metric.accumulate() coco_metric.reset() return if FLAGS.resume is not None: model.load(FLAGS.resume) save_dir = FLAGS.save_dir or 'yolo_checkpoint' model.fit(train_data=loader, epochs=FLAGS.epoch - FLAGS.no_mixup_epoch, save_dir=os.path.join(save_dir, "mixup"), save_freq=10) # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches dataset.mixup = False model.fit(train_data=loader, epochs=FLAGS.no_mixup_epoch, save_dir=os.path.join(save_dir, "no_mixup"), save_freq=5)
def main(FLAGS): paddle.enable_static() if FLAGS.static else None device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") # yapf: disable inputs = [ Input([None,1,48,384], "float32", name="pixel"), Input([None, None], "int64", name="label_in"), ] labels = [ Input([None, None], "int64", name="label_out"), Input([None, None], "float32", name="mask"), ] # yapf: enable model = paddle.Model( Seq2SeqAttModel( encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes), inputs, labels) lr = FLAGS.lr if FLAGS.lr_decay_strategy == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay( [200000, 250000], [lr, lr * 0.1, lr * 0.01]) else: learning_rate = lr grad_clip = fluid.clip.GradientClipByGlobalNorm(FLAGS.gradient_clip) optimizer = fluid.optimizer.Adam( learning_rate=learning_rate, parameter_list=model.parameters(), grad_clip=grad_clip) model.prepare(optimizer, WeightCrossEntropy(), SeqAccuracy()) train_dataset = data.train() train_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) train_sampler = data.BatchSampler( train_dataset, batch_size=FLAGS.batch_size, shuffle=True) train_loader = paddle.io.DataLoader( train_dataset, batch_sampler=train_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=train_collate_fn) test_dataset = data.test() test_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) test_sampler = data.BatchSampler( test_dataset, batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) test_loader = paddle.io.DataLoader( test_dataset, batch_sampler=test_sampler, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) model.fit(train_data=train_loader, eval_data=test_loader, epochs=FLAGS.epoch, save_dir=FLAGS.checkpoint_path, callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])