Esempio n. 1
0
class VisualDLWriter(Callback):
    """
    Use VisualDL to log data or image
    """
    def __init__(self, model):
        super(VisualDLWriter, self).__init__(model)

        assert six.PY3, "VisualDL requires Python >= 3.5"
        try:
            from visualdl import LogWriter
        except Exception as e:
            logger.error('visualdl not found, plaese install visualdl. '
                         'for example: `pip install visualdl`.')
            raise e
        self.vdl_writer = LogWriter(
            model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
        self.vdl_loss_step = 0
        self.vdl_mAP_step = 0
        self.vdl_image_step = 0
        self.vdl_image_frame = 0

    def on_step_end(self, status):
        mode = status['mode']
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'train':
                training_staus = status['training_staus']
                for loss_name, loss_value in training_staus.get().items():
                    self.vdl_writer.add_scalar(loss_name, loss_value,
                                               self.vdl_loss_step)
                    self.vdl_loss_step += 1
            elif mode == 'test':
                ori_image = status['original_image']
                result_image = status['result_image']
                self.vdl_writer.add_image(
                    "original/frame_{}".format(self.vdl_image_frame),
                    ori_image, self.vdl_image_step)
                self.vdl_writer.add_image(
                    "result/frame_{}".format(self.vdl_image_frame),
                    result_image, self.vdl_image_step)
                self.vdl_image_step += 1
                # each frame can display ten pictures at most.
                if self.vdl_image_step % 10 == 0:
                    self.vdl_image_step = 0
                    self.vdl_image_frame += 1

    def on_epoch_end(self, status):
        mode = status['mode']
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'eval':
                for metric in self.model._metrics:
                    for key, map_value in metric.get_results().items():
                        self.vdl_writer.add_scalar("{}-mAP".format(key),
                                                   map_value[0],
                                                   self.vdl_mAP_step)
                self.vdl_mAP_step += 1
Esempio n. 2
0
def main():
    cfg = load_config(FLAGS.config)

    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    dataset = cfg.TestReader['dataset']

    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    dataset.set_images(test_images)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            inputs_def['iterable'] = True
            feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    reader = create_reader(cfg.TestReader, devices_num=1)
    loader.set_sample_list_generator(reader, place)

    exe.run(startup_prog)
    if cfg.weights:
        checkpoint.load_params(exe, infer_prog, cfg.weights)

    # parse infer fetches
    assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg['metric'] in ['COCO', 'OID']:
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
        extra_keys = ['im_id', 'im_shape']
    keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)

    # parse dataset category
    if cfg.metric == 'COCO':
        from ppdet.utils.coco_eval import bbox2out, mask2out, segm2out, get_category_info
    if cfg.metric == 'OID':
        from ppdet.utils.oid_eval import bbox2out, get_category_info
    if cfg.metric == "VOC":
        from ppdet.utils.voc_eval import bbox2out, get_category_info
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, lmk2out, get_category_info

    anno_file = dataset.get_anno()
    with_background = dataset.with_background
    use_default_label = dataset.use_default_label

    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # use VisualDL to log image
    if FLAGS.use_vdl:
        assert six.PY3, "VisualDL requires Python >= 3.5"
        from visualdl import LogWriter
        vdl_writer = LogWriter(FLAGS.vdl_log_dir)
        vdl_image_step = 0
        vdl_image_frame = 0  # each frame can display ten pictures at most.

    imid2path = dataset.get_imid2path()
    resultBBox = []
    for iter_id, data in enumerate(loader()):
        outs = exe.run(infer_prog,
                       feed=data,
                       fetch_list=values,
                       return_numpy=False)
        res = {
            k: (np.array(v), v.recursive_sequence_lengths())
            for k, v in zip(keys, outs)
        }
        logger.info('Infer iter {}'.format(iter_id))
        if 'TTFNet' in cfg.architecture:
            res['bbox'][1].append([len(res['bbox'][0])])
        if 'CornerNet' in cfg.architecture:
            from ppdet.utils.post_process import corner_post_process
            post_config = getattr(cfg, 'PostProcess', None)
            corner_post_process(res, post_config, cfg.num_classes)

        bbox_results = None
        mask_results = None
        segm_results = None
        lmk_results = None
        if 'bbox' in res:
            bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
        if 'mask' in res:
            mask_results = mask2out([res], clsid2catid,
                                    model.mask_head.resolution)
        if 'segm' in res:
            segm_results = segm2out([res], clsid2catid)
        if 'landmark' in res:
            lmk_results = lmk2out([res], is_bbox_normalized)

        # bbox 四个值:左上角坐标 + 宽度 + 高度
        # {'image_id': 0, 'category_id': 0, 'bbox': [695.04443359375, 723.8153686523438, 128.288818359375, 61.5987548828125], 'score': 0.9990022778511047}
        im_ids = res['im_id'][0]
        image_path = imid2path[int(im_ids[0])]
        prefix = image_path.split('/')[-1]
        imageName = prefix.split('.')[0]
        for i, result in enumerate(bbox_results):
            score = result["score"]
            bbox = result["bbox"]
            x1 = str(int(bbox[0]))
            y1 = str(int(bbox[1]))
            x2 = str(int(bbox[2] + bbox[0]))
            y2 = str(int(bbox[3] + bbox[1]))
            if (score > 0.01):
                resStr = imageName + ' ' + str(round(
                    score,
                    3)) + ' ' + x1 + ' ' + y1 + ' ' + x2 + ' ' + y2 + '\n'
                resultBBox.append(resStr)

        # visualize result
        for im_id in im_ids:
            image_path = imid2path[int(im_id)]
            image = Image.open(image_path).convert('RGB')
            image = ImageOps.exif_transpose(image)

            # use VisualDL to log original image
            if FLAGS.use_vdl:
                original_image_np = np.array(image)
                vdl_writer.add_image(
                    "original/frame_{}".format(vdl_image_frame),
                    original_image_np, vdl_image_step)

            image = visualize_results(image, int(im_id), catid2name,
                                      FLAGS.draw_threshold, bbox_results,
                                      mask_results, segm_results, lmk_results)

            # use VisualDL to log image with bbox
            if FLAGS.use_vdl:
                infer_image_np = np.array(image)
                vdl_writer.add_image("bbox/frame_{}".format(vdl_image_frame),
                                     infer_image_np, vdl_image_step)
                vdl_image_step += 1
                if vdl_image_step % 10 == 0:
                    vdl_image_step = 0
                    vdl_image_frame += 1

            save_name = get_save_image_name(FLAGS.output_dir, image_path)
            logger.info("Detection bbox results save in {}".format(save_name))
            image.save(save_name, quality=95)
    resulttxtPath = "/home/aistudio/work/PaddleDetection-release-2.0-beta/output/test_result.txt"
    f = open(resulttxtPath, 'w+', encoding='utf-8')
    for i, p in enumerate(resultBBox):
        f.write(p)
    f.close()
Esempio n. 3
0
def main():
    cfg = load_config(FLAGS.config)

    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    dataset = cfg.TestReader['dataset']

    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    dataset.set_images(test_images)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            inputs_def['iterable'] = True
            feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    reader = create_reader(cfg.TestReader, devices_num=1)
    loader.set_sample_list_generator(reader, place)

    exe.run(startup_prog)
    if cfg.weights:
        checkpoint.load_params(exe, infer_prog, cfg.weights)

    # parse infer fetches
    assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg['metric'] in ['COCO', 'OID']:
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
        extra_keys = ['im_id', 'im_shape']
    keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)

    # parse dataset category
    if cfg.metric == 'COCO':
        from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
    if cfg.metric == 'OID':
        from ppdet.utils.oid_eval import bbox2out, get_category_info
    if cfg.metric == "VOC":
        from ppdet.utils.voc_eval import bbox2out, get_category_info
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, lmk2out, get_category_info

    anno_file = dataset.get_anno()
    with_background = dataset.with_background
    use_default_label = dataset.use_default_label

    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # use VisualDL to log image
    if FLAGS.use_vdl:
        assert six.PY3, "VisualDL requires Python >= 3.5"
        from visualdl import LogWriter
        vdl_writer = LogWriter(FLAGS.vdl_log_dir)
        vdl_image_step = 0
        vdl_image_frame = 0  # each frame can display ten pictures at most.

    imid2path = dataset.get_imid2path()
    for iter_id, data in enumerate(loader()):
        outs = exe.run(infer_prog,
                       feed=data,
                       fetch_list=values,
                       return_numpy=False)
        res = {
            k: (np.array(v), v.recursive_sequence_lengths())
            for k, v in zip(keys, outs)
        }
        logger.info('Infer iter {}'.format(iter_id))
        if 'TTFNet' in cfg.architecture:
            res['bbox'][1].append([len(res['bbox'][0])])

        bbox_results = None
        mask_results = None
        lmk_results = None
        if 'bbox' in res:
            bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
        if 'mask' in res:
            mask_results = mask2out([res], clsid2catid,
                                    model.mask_head.resolution)
        if 'landmark' in res:
            lmk_results = lmk2out([res], is_bbox_normalized)

        # visualize result
        im_ids = res['im_id'][0]
        for im_id in im_ids:
            image_path = imid2path[int(im_id)]
            image = Image.open(image_path).convert('RGB')

            # use VisualDL to log original image
            if FLAGS.use_vdl:
                original_image_np = np.array(image)
                vdl_writer.add_image(
                    "original/frame_{}".format(vdl_image_frame),
                    original_image_np, vdl_image_step)

            image = visualize_results(image,
                                      int(im_id), catid2name,
                                      FLAGS.draw_threshold, bbox_results,
                                      mask_results, lmk_results)

            # use VisualDL to log image with bbox
            if FLAGS.use_vdl:
                infer_image_np = np.array(image)
                vdl_writer.add_image("bbox/frame_{}".format(vdl_image_frame),
                                     infer_image_np, vdl_image_step)
                vdl_image_step += 1
                if vdl_image_step % 10 == 0:
                    vdl_image_step = 0
                    vdl_image_frame += 1

            save_name = get_save_image_name(FLAGS.output_dir, image_path)
            logger.info("Detection bbox results save in {}".format(save_name))
            image.save(save_name, quality=95)
Esempio n. 4
0
    def train(self):
        if not self.is_parallel:
            writer = LogWriter(logdir=self.result_dir + "/log/")
        self.genA2B.train(), self.genB2A.train(), self.disGA.train(
        ), self.disGB.train(), self.disLA.train(), self.disLB.train()

        start_iter = 1
        if self.resume:
            print(self.result_dir, self.dataset,
                  os.path.join(self.result_dir, self.dataset, 'model', '*.pt'))
            model_list = glob(
                os.path.join(self.result_dir, self.dataset, 'model', '*.pt'))
            print("resuming, model_list", model_list)
            if not len(model_list) == 0:
                model_list.sort()
                start_iter = int(model_list[-1].split('_')[-1].split('.')[0])
                print("resuming, start_iter", start_iter)
                self.load(os.path.join(self.result_dir, self.dataset, 'model'),
                          start_iter)
                print(" [*] Load SUCCESS")
                if self.decay_flag and start_iter > (self.iteration // 2):
                    self.G_optim._learning_rate -= (self.lr /
                                                    (self.iteration // 2)) * (
                                                        start_iter -
                                                        self.iteration // 2)
                    self.D_optim._learning_rate -= (self.lr /
                                                    (self.iteration // 2)) * (
                                                        start_iter -
                                                        self.iteration // 2)

        # training loop
        print('training start !')
        start_time = time.time()
        for step in range(start_iter, self.iteration + 1):
            if self.decay_flag and step > (self.iteration // 2):
                self.G_optim._learning_rate -= (self.lr /
                                                (self.iteration // 2))
                self.D_optim._learning_rate -= (self.lr /
                                                (self.iteration // 2))

            try:
                real_A, _ = trainA_iter.next()
            except:
                trainA_iter = iter(self.trainA_loader)
                real_A, _ = trainA_iter.next()

            try:
                real_B, _ = trainB_iter.next()
            except:
                trainB_iter = iter(self.trainB_loader)
                real_B, _ = trainB_iter.next()

            real_A = real_A[0]
            real_B = real_B[0]  ##some handling needed using paddle dataloader

            # Update D
            if hasattr(self.D_optim, "_optimizer"):  # support meta optimizer
                self.D_optim._optimizer.clear_gradients()
            else:
                self.D_optim.clear_gradients()

            fake_A2B, _, _ = self.genA2B(real_A)
            fake_B2A, _, _ = self.genB2A(real_B)

            real_GA_logit, real_GA_cam_logit, _ = self.disGA(real_A)
            real_LA_logit, real_LA_cam_logit, _ = self.disLA(real_A)
            real_GB_logit, real_GB_cam_logit, _ = self.disGB(real_B)
            real_LB_logit, real_LB_cam_logit, _ = self.disLB(real_B)

            fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A)
            fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A)
            fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B)
            fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B)

            D_ad_loss_GA = self.MSE_loss(
                real_GA_logit,
                torch.ones_like(real_GA_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_GA_logit,
                        torch.zeros_like(fake_GA_logit).to(self.device))
            D_ad_cam_loss_GA = self.MSE_loss(
                real_GA_cam_logit,
                torch.ones_like(real_GA_cam_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_GA_cam_logit,
                        torch.zeros_like(fake_GA_cam_logit).to(self.device))
            D_ad_loss_LA = self.MSE_loss(
                real_LA_logit,
                torch.ones_like(real_LA_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_LA_logit,
                        torch.zeros_like(fake_LA_logit).to(self.device))
            D_ad_cam_loss_LA = self.MSE_loss(
                real_LA_cam_logit,
                torch.ones_like(real_LA_cam_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_LA_cam_logit,
                        torch.zeros_like(fake_LA_cam_logit).to(self.device))
            D_ad_loss_GB = self.MSE_loss(
                real_GB_logit,
                torch.ones_like(real_GB_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_GB_logit,
                        torch.zeros_like(fake_GB_logit).to(self.device))
            D_ad_cam_loss_GB = self.MSE_loss(
                real_GB_cam_logit,
                torch.ones_like(real_GB_cam_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_GB_cam_logit,
                        torch.zeros_like(fake_GB_cam_logit).to(self.device))
            D_ad_loss_LB = self.MSE_loss(
                real_LB_logit,
                torch.ones_like(real_LB_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_LB_logit,
                        torch.zeros_like(fake_LB_logit).to(self.device))
            D_ad_cam_loss_LB = self.MSE_loss(
                real_LB_cam_logit,
                torch.ones_like(real_LB_cam_logit).to(
                    self.device)) + self.MSE_loss(
                        fake_LB_cam_logit,
                        torch.zeros_like(fake_LB_cam_logit).to(self.device))

            D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA +
                                          D_ad_loss_LA +
                                          D_ad_cam_loss_LA) / self.n_gpu
            D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB +
                                          D_ad_loss_LB +
                                          D_ad_cam_loss_LB) / self.n_gpu

            Discriminator_loss = D_loss_A + D_loss_B
            Discriminator_loss.backward()
            if self.is_parallel:
                self.disGA.apply_collective_grads()
                self.disGB.apply_collective_grads()
                self.disLA.apply_collective_grads()
                self.disLB.apply_collective_grads()
                self.genA2B.apply_collective_grads()
                self.genB2A.apply_collective_grads()
            self.D_optim.minimize(Discriminator_loss)

            # Update G
            if hasattr(self.G_optim, "_optimizer"):  # support meta optimizer
                self.G_optim._optimizer.clear_gradients()
            else:
                self.G_optim.clear_gradients()

            fake_A2B, fake_A2B_cam_logit, _ = self.genA2B(real_A)
            fake_B2A, fake_B2A_cam_logit, _ = self.genB2A(real_B)

            fake_A2B2A, _, _ = self.genB2A(fake_A2B)
            fake_B2A2B, _, _ = self.genA2B(fake_B2A)

            fake_A2A, fake_A2A_cam_logit, _ = self.genB2A(real_A)
            fake_B2B, fake_B2B_cam_logit, _ = self.genA2B(real_B)

            fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A)
            fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A)
            fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B)
            fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B)

            G_ad_loss_GA = self.MSE_loss(
                fake_GA_logit,
                torch.ones_like(fake_GA_logit).to(self.device))
            G_ad_cam_loss_GA = self.MSE_loss(
                fake_GA_cam_logit,
                torch.ones_like(fake_GA_cam_logit).to(self.device))
            G_ad_loss_LA = self.MSE_loss(
                fake_LA_logit,
                torch.ones_like(fake_LA_logit).to(self.device))
            G_ad_cam_loss_LA = self.MSE_loss(
                fake_LA_cam_logit,
                torch.ones_like(fake_LA_cam_logit).to(self.device))
            G_ad_loss_GB = self.MSE_loss(
                fake_GB_logit,
                torch.ones_like(fake_GB_logit).to(self.device))
            G_ad_cam_loss_GB = self.MSE_loss(
                fake_GB_cam_logit,
                torch.ones_like(fake_GB_cam_logit).to(self.device))
            G_ad_loss_LB = self.MSE_loss(
                fake_LB_logit,
                torch.ones_like(fake_LB_logit).to(self.device))
            G_ad_cam_loss_LB = self.MSE_loss(
                fake_LB_cam_logit,
                torch.ones_like(fake_LB_cam_logit).to(self.device))

            G_recon_loss_A = self.L1_loss(fake_A2B2A, real_A)
            G_recon_loss_B = self.L1_loss(fake_B2A2B, real_B)

            G_identity_loss_A = self.L1_loss(fake_A2A, real_A)
            G_identity_loss_B = self.L1_loss(fake_B2B, real_B)

            G_cam_loss_A = self.BCE_loss(
                fake_B2A_cam_logit,
                torch.ones_like(fake_B2A_cam_logit).to(
                    self.device)) + self.BCE_loss(
                        fake_A2A_cam_logit,
                        torch.zeros_like(fake_A2A_cam_logit).to(self.device))
            G_cam_loss_B = self.BCE_loss(
                fake_A2B_cam_logit,
                torch.ones_like(fake_A2B_cam_logit).to(
                    self.device)) + self.BCE_loss(
                        fake_B2B_cam_logit,
                        torch.zeros_like(fake_B2B_cam_logit).to(self.device))

            G_loss_A = (self.adv_weight *
                        (G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA +
                         G_ad_cam_loss_LA) + self.cycle_weight * G_recon_loss_A
                        + self.identity_weight * G_identity_loss_A +
                        self.cam_weight * G_cam_loss_A) / self.n_gpu
            G_loss_B = (self.adv_weight *
                        (G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB +
                         G_ad_cam_loss_LB) + self.cycle_weight * G_recon_loss_B
                        + self.identity_weight * G_identity_loss_B +
                        self.cam_weight * G_cam_loss_B) / self.n_gpu

            Generator_loss = G_loss_A + G_loss_B
            Generator_loss.backward()
            if self.is_parallel:
                self.disGA.apply_collective_grads()
                self.disGB.apply_collective_grads()
                self.disLA.apply_collective_grads()
                self.disLB.apply_collective_grads()
                self.genA2B.apply_collective_grads()
                self.genB2A.apply_collective_grads()
            self.G_optim.minimize(Generator_loss)

            # clip parameter of AdaILN and ILN, applied after optimizer step

            self.Rho_clipper(self.genA2B)
            self.Rho_clipper(self.genB2A)
            if not self.is_parallel:
                writer.add_scalar(tag="G/G_loss_A",
                                  step=step,
                                  value=G_loss_A.numpy())
                writer.add_scalar(tag="G/G_loss_B",
                                  step=step,
                                  value=G_loss_B.numpy())
                writer.add_scalar(tag="D/D_loss_A",
                                  step=step,
                                  value=D_loss_A.numpy())
                writer.add_scalar(tag="D/D_loss_B",
                                  step=step,
                                  value=D_loss_B.numpy())
                writer.add_scalar(tag="D/Discriminator_loss",
                                  step=step,
                                  value=Discriminator_loss.numpy())
                writer.add_scalar(tag="D/Generator_loss",
                                  step=step,
                                  value=Generator_loss.numpy())

                if step % 10 == 9:

                    writer.add_image("fake_A2B",
                                     (porch.Tensor(fake_A2B[0] * 255)).clamp_(
                                         0, 255).numpy().transpose(
                                             [1, 2, 0]).astype(np.uint8), step)
                    writer.add_image("fake_B2A",
                                     (porch.Tensor(fake_B2A[0] * 255)).clamp_(
                                         0, 255).numpy().transpose(
                                             [1, 2, 0]).astype(np.uint8), step)
                    writer.add_image("fake_A2B2A",
                                     (porch.Tensor(fake_A2B[0] * 255)).clamp_(
                                         0, 255).numpy().transpose(
                                             [1, 2, 0]).astype(np.uint8), step)
                    writer.add_image("fake_B2A2B",
                                     (porch.Tensor(fake_B2A[0] * 255)).clamp_(
                                         0, 255).numpy().transpose(
                                             [1, 2, 0]).astype(np.uint8), step)

            print("[%5d/%5d] time: %4.4f d_loss: %.8f, g_loss: %.8f" %
                  (step, self.iteration, time.time() - start_time,
                   Discriminator_loss, Generator_loss))
            if step % self.print_freq == 0:
                train_sample_num = 5
                test_sample_num = 5
                A2B = np.zeros((self.img_size * 7, 0, 3))
                B2A = np.zeros((self.img_size * 7, 0, 3))

                self.genA2B.eval(), self.genB2A.eval(), self.disGA.eval(
                ), self.disGB.eval(), self.disLA.eval(), self.disLB.eval()
                for _ in range(train_sample_num):
                    try:
                        real_A, _ = trainA_iter.next()
                    except:
                        trainA_iter = iter(self.trainA_loader)
                        real_A, _ = trainA_iter.next()

                    try:
                        real_B, _ = trainB_iter.next()
                    except:
                        trainB_iter = iter(self.trainB_loader)
                        real_B, _ = trainB_iter.next()

                    real_A, real_B = real_A[0], real_B[0]
                    fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A)
                    fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B)

                    fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B)
                    fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A)

                    fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A)
                    fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B)

                    A2B = np.concatenate(
                        (A2B,
                         np.concatenate(
                             (RGB2BGR(tensor2numpy(denorm(real_A[0]))),
                              cam(tensor2numpy(fake_A2A_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))),
                              cam(tensor2numpy(fake_A2B_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))),
                              cam(tensor2numpy(fake_A2B2A_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))),
                             0)), 1)

                    B2A = np.concatenate(
                        (B2A,
                         np.concatenate(
                             (RGB2BGR(tensor2numpy(denorm(real_B[0]))),
                              cam(tensor2numpy(fake_B2B_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))),
                              cam(tensor2numpy(fake_B2A_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))),
                              cam(tensor2numpy(fake_B2A2B_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))),
                             0)), 1)

                for _ in range(test_sample_num):
                    try:
                        real_A, _ = testA_iter.next()
                    except:
                        testA_iter = iter(self.testA_loader)
                        real_A, _ = testA_iter.next()

                    try:
                        real_B, _ = testB_iter.next()
                    except:
                        testB_iter = iter(self.testB_loader)
                        real_B, _ = testB_iter.next()
                    real_A, real_B = real_A[0], real_B[0]

                    fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A)
                    fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B)

                    fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B)
                    fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A)

                    fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A)
                    fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B)

                    A2B = np.concatenate(
                        (A2B,
                         np.concatenate(
                             (RGB2BGR(tensor2numpy(denorm(real_A[0]))),
                              cam(tensor2numpy(fake_A2A_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))),
                              cam(tensor2numpy(fake_A2B_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))),
                              cam(tensor2numpy(fake_A2B2A_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))),
                             0)), 1)

                    B2A = np.concatenate(
                        (B2A,
                         np.concatenate(
                             (RGB2BGR(tensor2numpy(denorm(real_B[0]))),
                              cam(tensor2numpy(fake_B2B_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))),
                              cam(tensor2numpy(fake_B2A_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))),
                              cam(tensor2numpy(fake_B2A2B_heatmap[0]),
                                  self.img_size),
                              RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))),
                             0)), 1)
                if (not self.is_parallel
                    ) or fluid.dygraph.parallel.Env().local_rank == 0:
                    cv2.imwrite(
                        os.path.join(self.result_dir, self.dataset, 'img',
                                     'A2B_%07d.png' % step), A2B * 255.0)
                    cv2.imwrite(
                        os.path.join(self.result_dir, self.dataset, 'img',
                                     'B2A_%07d.png' % step), B2A * 255.0)
                self.genA2B.train(), self.genB2A.train(), self.disGA.train(
                ), self.disGB.train(), self.disLA.train(), self.disLB.train()

            if step % self.save_freq == 0:
                if (not self.is_parallel
                    ) or fluid.dygraph.parallel.Env().local_rank == 0:
                    self.save(
                        os.path.join(self.result_dir, self.dataset, 'model'),
                        step)

            if step % 1000 == 0:
                params = {}
                params['genA2B'] = self.genA2B.state_dict()
                params['genB2A'] = self.genB2A.state_dict()
                params['disGA'] = self.disGA.state_dict()
                params['disGB'] = self.disGB.state_dict()
                params['disLA'] = self.disLA.state_dict()
                params['disLB'] = self.disLB.state_dict()
                if (not self.is_parallel
                    ) or fluid.dygraph.parallel.Env().local_rank == 0:
                    torch.save(
                        params,
                        os.path.join(self.result_dir,
                                     self.dataset + '_params_latest.pt'))
Esempio n. 5
0
def main():
    # Step 0: preparation
    writer = LogWriter(logdir="./log/scalar")
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        image_folder = ""
        image_list_file = "dummy_data/fabric_list.txt"
        transform = Transform()  #Normalize2()  # [0,255]-->[0,1]
        x_data = DataLoader(image_folder, image_list_file, transform=transform)
        x_dataloader = fluid.io.DataLoader.from_generator(capacity=2,
                                                          return_list=True)
        x_dataloader.set_sample_generator(x_data, args.batch_size)

        total_batch = len(x_data) // args.batch_size

        # Step 2: Create model
        if args.net == "basic":
            D = Discriminator()
            G = Generator()
            E = Invertor()
        else:
            raise NotImplementedError(
                f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_Loss
        D_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=D.parameters())
        G_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=G.parameters())
        E_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=E.parameters())

        G_loss_meter = AverageMeter()
        D_loss_meter = AverageMeter()
        E_loss_meter = AverageMeter()

        D.train()
        G.train()
        E.train()

        # Step 4: Slight Training
        iteration = -1
        is_slight_Train = True
        for epoch in range(1, args.epoch_num + 1):
            #optim Discriminator
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                if is_slight_Train:
                    iteration += 1
                    x = fluid.layers.cast(x, dtype="float32")
                    x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                    preds_x = D(x)
                    preds_x_array = preds_x.numpy()
                    #print("D(x),1",preds_array.shape, np.mean(preds_array))
                    writer.add_scalar(tag="D(x)=1",
                                      step=iteration,
                                      value=np.mean(preds_x_array))
                    if np.mean(preds_x_array) >= 0.98:
                        is_slight_Train = False

                    z = np.random.rand(n, 64)
                    zeros = np.zeros((n, 1))
                    z = to_variable(z)
                    zeros = to_variable(zeros)
                    z = fluid.layers.cast(z, dtype="float32")
                    zeros = fluid.layers.cast(zeros, dtype="int64")
                    preds_fx = D(G(z))
                    preds_fx_array = preds_fx.numpy()
                    writer.add_scalar(tag="D(G(z))=0",
                                      step=iteration,
                                      value=np.mean(preds_fx_array))
                    D_loss = criterion(preds_x, x_labels) + criterion(
                        preds_fx, zeros)
                    D_loss.backward()
                    D_optim.minimize(D_loss)
                    D.clear_gradients()
                    D_loss_meter.update(D_loss.numpy()[0], n)
                    writer.add_scalar(tag="D_loss",
                                      step=iteration,
                                      value=D_loss_meter.avg)
                    print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                          f"STEP{iteration}, " +
                          f"Average D Loss: {D_loss_meter.avg:4f}, ")

                    z = np.random.rand(n, 64)
                    ones = np.ones((n, 1))
                    z = to_variable(z)
                    ones = to_variable(ones)
                    z = fluid.layers.cast(z, dtype="float32")
                    ones = fluid.layers.cast(ones, dtype="int64")
                    preds = D(G(z))
                    preds_array = preds.numpy()
                    writer.add_scalar(tag="D(G(z))=1",
                                      step=iteration,
                                      value=np.mean(preds_array))
                    G_loss = criterion(preds, ones)
                    G_loss.backward()
                    G_optim.minimize(G_loss)
                    G.clear_gradients()
                    G_loss_meter.update(G_loss.numpy()[0], n)
                    writer.add_scalar(tag="G_loss",
                                      step=iteration,
                                      value=G_loss_meter.avg)
                    print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                          f"STEP{iteration}, " +
                          f"Average G Loss: {G_loss_meter.avg:4f}")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num or not is_slight_Train:
                D_model_path = os.path.join(args.checkpoint_folder,
                                            f"D_{args.net}-Epoch-{epoch}")
                G_model_path = os.path.join(args.checkpoint_folder,
                                            f"G_{args.net}-Epoch-{epoch}")

                # save model and optmizer states
                model_dict = D.state_dict()
                fluid.save_dygraph(model_dict, D_model_path)
                optim_dict = D_optim.state_dict()
                fluid.save_dygraph(optim_dict, D_model_path)

                model_dict = G.state_dict()
                fluid.save_dygraph(model_dict, G_model_path)
                optim_dict = G_optim.state_dict()
                fluid.save_dygraph(optim_dict, G_model_path)

                print(
                    f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams'
                )
                if not is_slight_Train:
                    break

        # Step 5:  full training for Generator and Discriminator
        D_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=D.parameters())
        G_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=G.parameters())
        G_loss_meter = AverageMeter()
        D_loss_meter = AverageMeter()

        for epoch in range(1, args.epoch_num + 1):
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                iteration += 1
                x = fluid.layers.cast(x, dtype="float32")
                x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                preds1 = D(x)
                preds_array = preds1.numpy()
                writer.add_scalar(tag="D(x)=1",
                                  step=iteration,
                                  value=np.mean(preds_array))
                z = np.random.rand(n, 64)
                zeros = np.zeros((n, 1))
                z = to_variable(z)
                zeros = to_variable(zeros)
                z = fluid.layers.cast(z, dtype="float32")
                zeros = fluid.layers.cast(zeros, dtype="int64")
                preds2 = D(G(z))
                preds_array = preds2.numpy()
                #print("DG(z),0:",preds_array.shape, np.mean(preds_array))
                writer.add_scalar(tag="D(G(z))=0",
                                  step=iteration,
                                  value=np.mean(preds_array))
                D_loss = criterion(preds1, x_labels) + criterion(preds2, zeros)
                D_loss.backward()
                D_optim.minimize(D_loss)
                D.clear_gradients()
                D_loss_meter.update(D_loss.numpy()[0], n)
                writer.add_scalar(tag="D_loss",
                                  step=iteration,
                                  value=D_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average D Loss: {D_loss_meter.avg:4f} ")
                z = np.random.rand(n, 64)
                ones = np.ones((n, 1))
                z = to_variable(z)
                ones = to_variable(ones)
                z = fluid.layers.cast(z, dtype="float32")
                ones = fluid.layers.cast(ones, dtype="int64")
                preds = D(G(z))
                preds_array = preds.numpy()
                #print("DG(z),1:",preds_array.shape, np.mean(preds_array))
                writer.add_scalar(tag="D(G(z))=1",
                                  step=iteration,
                                  value=np.mean(preds_array))
                G_loss = criterion(preds, ones)
                G_loss.backward()
                G_optim.minimize(G_loss)
                G.clear_gradients()
                G_loss_meter.update(G_loss.numpy()[0], n)
                writer.add_scalar(tag="G_loss",
                                  step=iteration,
                                  value=G_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average G Loss: {G_loss_meter.avg:4f}")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num:
                D_model_path = os.path.join(args.checkpoint_folder,
                                            f"D_{args.net}-Epoch-{epoch}")
                G_model_path = os.path.join(args.checkpoint_folder,
                                            f"G_{args.net}-Epoch-{epoch}")

                # save model and optmizer states
                model_dict = D.state_dict()
                fluid.save_dygraph(model_dict, D_model_path)
                optim_dict = D_optim.state_dict()
                fluid.save_dygraph(optim_dict, D_model_path)

                model_dict = G.state_dict()
                fluid.save_dygraph(model_dict, G_model_path)
                optim_dict = G_optim.state_dict()
                fluid.save_dygraph(optim_dict, G_model_path)
                print(
                    f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams'
                )

        # Step 6: full training for Inverter
        E_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=E.parameters())
        E_loss_meter = AverageMeter()

        for epoch in range(1, args.epoch_num + 1):
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                iteration += 1
                x = fluid.layers.cast(x, dtype="float32")
                image = x.numpy()[0] * 255
                writer.add_image(tag="x", step=iteration, img=image)
                x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                invert_x = G(E(x))
                invert_image = fluid.layers.transpose(invert_x,
                                                      perm=[0, 2, 3, 1])
                invert_image = invert_image.numpy()[0] * 255
                #print("D(x),1",preds_array.shape, np.mean(preds_array))
                writer.add_image(tag="invert_x",
                                 step=iteration,
                                 img=invert_image)
                print(np.max(invert_image), np.min(invert_image))
                E_loss = fluid.layers.mse_loss(invert_x, x)
                print("E_loss shape:", E_loss.numpy().shape)
                E_loss.backward()
                E_optim.minimize(E_loss)
                E.clear_gradients()
                E_loss_meter.update(E_loss.numpy()[0], n)
                writer.add_scalar(tag="E_loss",
                                  step=iteration,
                                  value=E_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average E Loss: {E_loss_meter.avg:4f}, ")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num:
                E_model_path = os.path.join(args.checkpoint_folder,
                                            f"E_{args.net}-Epoch-{epoch}")
                # save model and optmizer states
                model_dict = E.state_dict()
                fluid.save_dygraph(model_dict, E_model_path)
                optim_dict = E_optim.state_dict()
                fluid.save_dygraph(optim_dict, E_model_path)
                print(
                    f'----- Save model: {E_model_path}.pdparams, {E_model_path}.pdparams'
                )
Esempio n. 6
0
    def train(self, loaders):
        args = self.args
        nets = self.nets
        nets_ema = self.nets_ema
        optims = self.optims
        writer = LogWriter(logdir=self.args.checkpoint_dir + "/log/")

        # fetch random validation images for debugging
        fetcher = InputFetcher(loaders.src, loaders.ref, args.latent_dim,
                               'train')
        fetcher_val = InputFetcher(loaders.val, None, args.latent_dim, 'val')
        inputs_val = next(fetcher_val)

        # resume training if necessary
        if args.resume_iter > 0:
            self._load_checkpoint(args.resume_iter)

        # remember the initial value of ds weight
        initial_lambda_ds = args.lambda_ds

        print('Start training...')
        import tqdm
        start_time = time.time()
        tqdm_descriptor = tqdm.trange(args.resume_iter, args.total_iters)
        for i in tqdm_descriptor:
            # fetch images and labels
            inputs = next(fetcher)
            x_real, y_org = inputs.x_src, inputs.y_src
            x_ref, x_ref2, y_trg = inputs.x_ref, inputs.x_ref2, inputs.y_ref
            z_trg, z_trg2 = inputs.z_trg, inputs.z_trg2

            masks = nets.fan.get_heatmap(x_real) if args.w_hpf > 0 else None

            # train the discriminator
            d_loss, d_losses_latent = compute_d_loss(nets,
                                                     args,
                                                     x_real,
                                                     y_org,
                                                     y_trg,
                                                     z_trg=z_trg,
                                                     masks=masks)
            self._reset_grad()
            d_loss.backward()
            optims.discriminator.minimize(d_loss)

            d_loss, d_losses_ref = compute_d_loss(nets,
                                                  args,
                                                  x_real,
                                                  y_org,
                                                  y_trg,
                                                  x_ref=x_ref,
                                                  masks=masks)
            self._reset_grad()
            d_loss.backward()
            optims.discriminator.minimize(d_loss)

            # train the generator
            if i - args.resume_iter > 100:  ##train discriminator first
                g_loss, g_losses_latent, sample_1 = compute_g_loss(
                    nets,
                    args,
                    x_real,
                    y_org,
                    y_trg,
                    z_trgs=[z_trg, z_trg2],
                    masks=masks)
                self._reset_grad()
                g_loss.backward()
                optims.generator.minimize(g_loss)
                optims.mapping_network.minimize(g_loss)
                optims.style_encoder.minimize(g_loss)

                g_loss, g_losses_ref, sample_2 = compute_g_loss(
                    nets,
                    args,
                    x_real,
                    y_org,
                    y_trg,
                    x_refs=[x_ref, x_ref2],
                    masks=masks)
                self._reset_grad()
                g_loss.backward()
                optims.generator.minimize(g_loss)

                # compute moving average of network parameters
                moving_average(nets.generator, nets_ema.generator, beta=0.999)
                moving_average(nets.mapping_network,
                               nets_ema.mapping_network,
                               beta=0.999)
                moving_average(nets.style_encoder,
                               nets_ema.style_encoder,
                               beta=0.999)

                # decay weight for diversity sensitive loss
                if args.lambda_ds > 0:
                    args.lambda_ds -= (initial_lambda_ds / args.ds_iter)

                # print out log info
                if (i + 1) % args.print_every == 0:
                    elapsed = time.time() - start_time
                    elapsed = str(datetime.timedelta(seconds=elapsed))[:-7]
                    log = "Elapsed time [%s], Iteration [%i/%i], " % (
                        elapsed, i + 1, args.total_iters)
                    all_losses = dict()
                    for loss, prefix in zip([
                            d_losses_latent, d_losses_ref, g_losses_latent,
                            g_losses_ref
                    ], ['D/latent_', 'D/ref_', 'G/latent_', 'G/ref_']):
                        for key, value in loss.items():
                            all_losses[prefix + key] = value
                            writer.add_scalar(tag=prefix + key,
                                              step=i + 1,
                                              value=value)
                    all_losses['G/lambda_ds'] = args.lambda_ds
                    log += ' '.join([
                        '%s: [%.4f]' % (key, value)
                        for key, value in all_losses.items()
                    ])
                    tqdm_descriptor.set_description(log)
                    writer.add_image("x_fake",
                                     (utils.denormalize(sample_1) *
                                      255).numpy().transpose([1, 2, 0]).astype(
                                          np.uint8), i + 1)

                # generate images for debugging
                if (i + 1) % args.sample_every == 0:
                    os.makedirs(args.sample_dir, exist_ok=True)
                    utils.debug_image(nets_ema,
                                      args,
                                      inputs=inputs_val,
                                      step=i + 1)

                # save model checkpoints
                if (i + 1) % args.save_every == 0:
                    self._save_checkpoint(step=i + 1)

                # compute FID and LPIPS if necessary
                if (i + 1) % args.eval_every == 0:
                    calculate_metrics(nets_ema, args, i + 1, mode='latent')
                    calculate_metrics(nets_ema, args, i + 1, mode='reference')
            else:
                if (i + 1) % args.print_every == 0:
                    elapsed = time.time() - start_time
                    elapsed = str(datetime.timedelta(seconds=elapsed))[:-7]
                    log = "Elapsed time [%s], Iteration [%i/%i], " % (
                        elapsed, i + 1, args.total_iters)
                    all_losses = dict()
                    for loss, prefix in zip([d_losses_latent, d_losses_ref],
                                            ['D/latent_', 'D/ref_']):
                        for key, value in loss.items():
                            all_losses[prefix + key] = value
                            writer.add_scalar(tag=prefix + key,
                                              step=i + 1,
                                              value=value)
                    log += ' '.join([
                        '%s: [%.4f]' % (key, value)
                        for key, value in all_losses.items()
                    ])
                    tqdm_descriptor.set_description(log)

        writer.close()
Esempio n. 7
0
def synthesis(text_input, args):
    local_rank = dg.parallel.Env().local_rank
    place = (fluid.CUDAPlace(local_rank) if args.use_gpu else fluid.CPUPlace())

    with open(args.config) as f:
        cfg = yaml.load(f, Loader=yaml.Loader)

    # tensorboard
    if not os.path.exists(args.output):
        os.mkdir(args.output)

    writer = LogWriter(os.path.join(args.output, 'log'))

    fluid.enable_dygraph(place)
    with fluid.unique_name.guard():
        network_cfg = cfg['network']
        model = TransformerTTS(
            network_cfg['embedding_size'], network_cfg['hidden_size'],
            network_cfg['encoder_num_head'], network_cfg['encoder_n_layers'],
            cfg['audio']['num_mels'], network_cfg['outputs_per_step'],
            network_cfg['decoder_num_head'], network_cfg['decoder_n_layers'])
        # Load parameters.
        global_step = io.load_parameters(
            model=model, checkpoint_path=args.checkpoint_transformer)
        model.eval()

    # init input
    text = np.asarray(text_to_sequence(text_input))
    text = fluid.layers.unsqueeze(dg.to_variable(text).astype(np.int64), [0])
    mel_input = dg.to_variable(np.zeros([1, 1, 80])).astype(np.float32)
    pos_text = np.arange(1, text.shape[1] + 1)
    pos_text = fluid.layers.unsqueeze(
        dg.to_variable(pos_text).astype(np.int64), [0])

    for i in range(args.max_len):
        pos_mel = np.arange(1, mel_input.shape[1] + 1)
        pos_mel = fluid.layers.unsqueeze(
            dg.to_variable(pos_mel).astype(np.int64), [0])
        mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(
            text, mel_input, pos_text, pos_mel)
        if stop_preds.numpy()[0, -1] > args.stop_threshold:
            break
        mel_input = fluid.layers.concat([mel_input, postnet_pred[:, -1:, :]],
                                        axis=1)
    global_step = 0
    for i, prob in enumerate(attn_probs):
        for j in range(4):
            x = np.uint8(cm.viridis(prob.numpy()[j]) * 255)
            writer.add_image('Attention_%d_0' % global_step, x, i * 4 + j)

    if args.vocoder == 'griffin-lim':
        #synthesis use griffin-lim
        wav = synthesis_with_griffinlim(postnet_pred, cfg['audio'])
    elif args.vocoder == 'waveflow':
        # synthesis use waveflow
        wav = synthesis_with_waveflow(postnet_pred, args,
                                      args.checkpoint_vocoder, place)
    else:
        print(
            'vocoder error, we only support griffinlim and waveflow, but recevied %s.'
            % args.vocoder)

    writer.add_audio(text_input + '(' + args.vocoder + ')', wav, 0,
                     cfg['audio']['sr'])
    if not os.path.exists(os.path.join(args.output, 'samples')):
        os.mkdir(os.path.join(args.output, 'samples'))
    write(
        os.path.join(os.path.join(args.output, 'samples'),
                     args.vocoder + '.wav'), cfg['audio']['sr'], wav)
    print("Synthesis completed !!!")
    writer.close()
Esempio n. 8
0
def main(args):
    local_rank = dg.parallel.Env().local_rank
    nranks = dg.parallel.Env().nranks
    parallel = nranks > 1

    with open(args.config) as f:
        cfg = yaml.load(f, Loader=yaml.Loader)

    global_step = 0
    place = fluid.CUDAPlace(local_rank) if args.use_gpu else fluid.CPUPlace()

    if not os.path.exists(args.output):
        os.mkdir(args.output)

    writer = LogWriter(os.path.join(args.output,
                                    'log')) if local_rank == 0 else None

    fluid.enable_dygraph(place)
    network_cfg = cfg['network']
    model = TransformerTTS(
        network_cfg['embedding_size'], network_cfg['hidden_size'],
        network_cfg['encoder_num_head'], network_cfg['encoder_n_layers'],
        cfg['audio']['num_mels'], network_cfg['outputs_per_step'],
        network_cfg['decoder_num_head'], network_cfg['decoder_n_layers'])

    model.train()
    optimizer = fluid.optimizer.AdamOptimizer(
        learning_rate=dg.NoamDecay(1 / (cfg['train']['warm_up_step'] *
                                        (cfg['train']['learning_rate']**2)),
                                   cfg['train']['warm_up_step']),
        parameter_list=model.parameters(),
        grad_clip=fluid.clip.GradientClipByGlobalNorm(cfg['train'][
            'grad_clip_thresh']))

    # Load parameters.
    global_step = io.load_parameters(
        model=model,
        optimizer=optimizer,
        checkpoint_dir=os.path.join(args.output, 'checkpoints'),
        iteration=args.iteration,
        checkpoint_path=args.checkpoint)
    print("Rank {}: checkpoint loaded.".format(local_rank))

    if parallel:
        strategy = dg.parallel.prepare_context()
        model = fluid.dygraph.parallel.DataParallel(model, strategy)

    reader = LJSpeechLoader(
        cfg['audio'],
        place,
        args.data,
        cfg['train']['batch_size'],
        nranks,
        local_rank,
        shuffle=True).reader

    iterator = iter(tqdm(reader))

    global_step += 1

    while global_step <= cfg['train']['max_iteration']:
        try:
            batch = next(iterator)
        except StopIteration as e:
            iterator = iter(tqdm(reader))
            batch = next(iterator)

        character, mel, mel_input, pos_text, pos_mel, stop_tokens = batch

        mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(
            character, mel_input, pos_text, pos_mel)

        mel_loss = layers.mean(
            layers.abs(layers.elementwise_sub(mel_pred, mel)))
        post_mel_loss = layers.mean(
            layers.abs(layers.elementwise_sub(postnet_pred, mel)))
        loss = mel_loss + post_mel_loss

        stop_loss = cross_entropy(
            stop_preds, stop_tokens, weight=cfg['network']['stop_loss_weight'])
        loss = loss + stop_loss

        if local_rank == 0:
            writer.add_scalar('training_loss/mel_loss',
                              mel_loss.numpy(),
                              global_step)
            writer.add_scalar('training_loss/post_mel_loss',
                              post_mel_loss.numpy(),
                              global_step)
            writer.add_scalar('stop_loss', stop_loss.numpy(), global_step)

            if parallel:
                writer.add_scalar('alphas/encoder_alpha',
                                   model._layers.encoder.alpha.numpy(),
                                   global_step)
                writer.add_scalar('alphas/decoder_alpha',
                                   model._layers.decoder.alpha.numpy(),
                                   global_step)
            else:
                writer.add_scalar('alphas/encoder_alpha',
                                   model.encoder.alpha.numpy(),
                                   global_step)
                writer.add_scalar('alphas/decoder_alpha',
                                   model.decoder.alpha.numpy(),
                                   global_step)

            writer.add_scalar('learning_rate',
                              optimizer._learning_rate.step().numpy(),
                              global_step)

            if global_step % cfg['train']['image_interval'] == 1:
                for i, prob in enumerate(attn_probs):
                    for j in range(cfg['network']['decoder_num_head']):
                        x = np.uint8(
                            cm.viridis(prob.numpy()[j * cfg['train'][
                                'batch_size'] // nranks]) * 255)
                        writer.add_image(
                            'Attention_%d_0' % global_step,
                            x,
                            i * 4 + j)

                for i, prob in enumerate(attn_enc):
                    for j in range(cfg['network']['encoder_num_head']):
                        x = np.uint8(
                            cm.viridis(prob.numpy()[j * cfg['train'][
                                'batch_size'] // nranks]) * 255)
                        writer.add_image(
                            'Attention_enc_%d_0' % global_step,
                            x,
                            i * 4 + j)

                for i, prob in enumerate(attn_dec):
                    for j in range(cfg['network']['decoder_num_head']):
                        x = np.uint8(
                            cm.viridis(prob.numpy()[j * cfg['train'][
                                'batch_size'] // nranks]) * 255)
                        writer.add_image(
                            'Attention_dec_%d_0' % global_step,
                            x,
                            i * 4 + j)

        if parallel:
            loss = model.scale_loss(loss)
            loss.backward()
            model.apply_collective_grads()
        else:
            loss.backward()
        optimizer.minimize(loss)
        model.clear_gradients()

        # save checkpoint
        if local_rank == 0 and global_step % cfg['train'][
                'checkpoint_interval'] == 0:
            io.save_parameters(
                os.path.join(args.output, 'checkpoints'), global_step, model,
                optimizer)
        global_step += 1

    if local_rank == 0:
        writer.close()
Esempio n. 9
0
def main(args):
    config = Config(args.config)
    cfg = config(vars(args), mode=['infer', 'init'])
    scale = cfg['infer']['scale']
    mdname = cfg['infer']['model']
    imgname = ''.join(mdname)  # + '/' + str(scale)
    #dirname = ''.join(mdname)  + '_' + str(scale)
    sz = cfg['infer']['sz']
    infer_size = cfg['infer']['infer_size']
    #save_path = os.path.join(args.save_dir, cfg['init']['result'])
    list = cfg['infer']['infer_size']
    save_path = create_path(args.save_dir, cfg['init']['result'])
    save_path = create_path(save_path, imgname)
    save_path = create_path(save_path, str(scale))

    tif_path = create_path(save_path, cfg['infer']['lab'])
    color_path = create_path(save_path, cfg['infer']['color'])
    gray_path = create_path(save_path, cfg['infer']['gray'])
    vdl_dir = os.path.join(args.save_dir, cfg['init']['vdl_dir'])
    palette = cfg['infer']['palette']
    palette = np.array(palette, dtype=np.uint8)
    num_class = cfg['init']['num_classes']
    batchsz = cfg['infer']['batchsz']
    infer_path = os.path.join(cfg['infer']['root_path'], cfg['infer']['path'])
    tagname = imgname + '/' + str(scale)
    vdl_dir = os.path.join(vdl_dir, 'infer')
    writer = LogWriter(logdir=vdl_dir)

    infer_ds = TeDataset(path=cfg['infer']['root_path'],
                         fl=cfg['infer']['path'],
                         sz=sz)
    total = len(infer_ds)
    # select model
    #addresult = np.zeros((total//batchsz,batchsz,num_class,sz,sz))
    addresult = np.zeros((total, num_class, sz, sz))
    for mnet in mdname:
        result_list = []
        net = modelset(mode=mnet, num_classes=cfg['init']['num_classes'])
        # load moel
        input = InputSpec([None, 3, 64, 64], 'float32', 'x')
        label = InputSpec([None, 1, 64, 64], 'int64', 'label')
        model = paddle.Model(net, input, label)
        model.load(path=os.path.join(args.save_dir, mnet) + '/' + mnet)
        model.prepare()
        result = model.predict(
            infer_ds,
            batch_size=batchsz,
            num_workers=cfg['infer']['num_workers'],
            stack_outputs=True  # [160,2,64,64]
        )

        addresult = result[0] + scale * addresult

    pred = construct(addresult, infer_size, sz=sz)
    # pred = construct(addresult,infer_size,sz = sz)
    # # 腐蚀膨胀
    # read vdl
    file_list = os.listdir(infer_path)
    file_list.sort(key=lambda x: int(x[-5:-4]))
    step = 0
    for i, fl in enumerate(file_list):
        name, _ = fl.split(".")
        # save pred
        lab_img = Image.fromarray(pred[i].astype(np.uint8)).convert("L")
        saveimg(lab_img, tif_path, name=name, type='.tif')

        # gray_label
        label = colorize(pred[i], palette)
        writer.add_image(tag=tagname,
                         img=saveimg(label,
                                     gray_path,
                                     name=name,
                                     type='.png',
                                     re_out=True),
                         step=step,
                         dataformats='HW')
        step += 1

        # color_label
        file = os.path.join(infer_path, fl)
        out = blend_image(file, label, alpha=0.25)
        writer.add_image(tag=tagname,
                         img=saveimg(out,
                                     color_path,
                                     name=name,
                                     type='.png',
                                     re_out=True),
                         step=step,
                         dataformats='HWC')
        step += 1
    writer.close()