def __init__(self, batch_size): super().__init__() self.batch_size = batch_size self.test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # random crop, color jitter etc self.train_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([transforms.GaussianBlur([1, 1])], p=0.5), # perhaps this blur is too much transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.491, 0.482, 0.446], std=[0.247, 0.243, 0.261]), ]) self.train_dataset = MSCOCO(train=True, image_transforms=self.train_transform) self.test_dataset = MSCOCO(train=False, image_transforms=self.test_transform)
def main(): conf = get_args() if not conf.silent: save_path = os.path.abspath(script_path + conf.save_path) if not os.path.exists(save_path): os.mkdir(save_path) preview_path = os.path.abspath(save_path + "/preview") if not os.path.exists(preview_path): os.mkdir(preview_path) dataset = MSCOCO(conf) netG, netD = build_models(conf) optimizerG, optimizerD = build_optimizer(netG, netD, conf.adam_lr, conf.adam_beta1, conf.adam_beta2) pprog = print_progress(conf.max_epoch, conf.batch_size, dataset.train_data_len, use_epoch=True) updater = Updater(netG, netD, optimizerG, optimizerD, conf) print("==========================================") print("Info:start train") val_times = dataset.val_data_len // dataset.batch_size if dataset.val_data_len % dataset.batch_size != 0: val_times += 1 for i in range(conf.max_epoch): train_loss = np.array([0., 0.], dtype="float32") start = time.time() for data in dataset.get_data(): data = toGPU(data, conf.gpu_num) updater.update(data, i) if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() if i % conf.snapshot_interval == 0 and not conf.silent: data = dataset.sampling(conf.sample_size) sample = sample_generate(netG, data, conf) Image.fromarray(sample).save(preview_path + f"/image_{i:04d}.png") print("\n\n\n\n==========================================") print("Info:finish train")
def create(cfg): dataset_type = cfg.dataset_type if dataset_type == "mpii": from dataset.mpii import MPII data = MPII(cfg) elif dataset_type == "coco": from dataset.mscoco import MSCOCO data = MSCOCO(cfg) elif dataset_type == "penn_action": from dataset.penn_action import PennAction data = PennAction(cfg) elif dataset_type == "default": data = PoseDataset(cfg) else: raise Exception("Unsupported dataset_type: \"{}\"".format(dataset_type)) return data
def create(cfg): # 根据配置文件里面所指定的数据集类型创建不同的数据读取类的实例 dataset_type = cfg.dataset_type if dataset_type == "mpii": from dataset.mpii import MPII data = MPII(cfg) # MPII继承自PoseDataset类 elif dataset_type == "coco": from dataset.mscoco import MSCOCO data = MSCOCO(cfg) elif dataset_type == "penn_action": from dataset.penn_action import PennAction data = PennAction(cfg) elif dataset_type == "default": data = PoseDataset(cfg)# PoseDataset是所有数据集的基类 else: raise Exception("Unsupported dataset_type: \"{}\"".format(dataset_type)) return data
def main(): conf = get_args() dataset = MSCOCO(conf) VOC_SIZE = dataset.jp_voc_size if conf.use_lang == "jp" else dataset.en_voc_size SAMPLE_SIZE = conf.sample_size // conf.gpu_num if conf.gpu_num > 1 else conf.sample_size SEQ_LEN = conf.seq_len_jp if conf.use_lang == "jp" else conf.seq_len_en index2tok = dataset.jp_index2tok if conf.use_lang == "jp" else dataset.en_index2tok if not conf.silent: save_path = os.path.abspath(script_path + conf.save_path) if not os.path.exists(save_path): os.mkdir(save_path) save_path = os.path.abspath(save_path + f"/{conf.use_lang}") if not os.path.exists(save_path): os.mkdir(save_path) preview_path = os.path.abspath(save_path + "/preview") if not os.path.exists(preview_path): os.mkdir(preview_path) netG, netD = build_models(conf, VOC_SIZE, SEQ_LEN) optimizerG, optimizerD = build_optimizer(netG, netD, conf.adam_lr, conf.adam_beta1, conf.adam_beta2) pprog = print_progress(conf.pre_gen_max_epoch, conf.batch_size, dataset.train_data_len) updater = Updater(netG, netD, optimizerG, optimizerD, conf) def pretrain_generatr(): print("==========================================") print("Info:start genarator pre train") pre_gen_loss_hist = np.zeros((1, conf.pre_gen_max_epoch), dtype="float32") for i in range(conf.pre_gen_max_epoch): count = 0 total_loss = 0 start = time.time() for data in dataset.get_data(): data = toGPU(data, conf.gpu_num) loss = updater.update_pre_gen(data) total_loss += loss.data.cpu().numpy() count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() pre_gen_loss_hist[0, i] = total_loss / count if not conf.silent: data = dataset.sample(conf.sample_size) sample_generate(netG, data, SAMPLE_SIZE, index2tok, conf.gpu_num,\ conf.noise_dim, preview_path + f"/sample_text_pretrain.txt") np.save(save_path + "/pre_gen_loss_hist", pre_gen_loss_hist) torch.save(netG.state_dict(), save_path + "/pretrain_gen_params") print("\n\n\n\n==========================================") def pretrain_discriminator(): print("==========================================") print("Info:start discriminator pre train") dataset.clear_state() pprog.max_iter = conf.pre_dis_max_epoch pre_dis_hist = np.zeros((4, conf.pre_dis_max_epoch), dtype="float32") for i in range(conf.pre_dis_max_epoch): count = 0 total_loss = 0 total_real_acc = 0 total_fake_acc = 0 total_wrong_acc = 0 start = time.time() for data in dataset.get_data(): data = toGPU(data, conf.gpu_num) loss, real_acc, fake_acc, wrong_acc = updater.update_dis(data) total_loss += loss.data.cpu().numpy() total_real_acc += real_acc.data.cpu().numpy() total_fake_acc += fake_acc.data.cpu().numpy() total_wrong_acc += wrong_acc.data.cpu().numpy() count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() pre_dis_hist[0, i] = total_loss / count pre_dis_hist[1, i] = total_real_acc / count pre_dis_hist[2, i] = total_fake_acc / count pre_dis_hist[3, i] = total_wrong_acc / count if not conf.silent: np.save(save_path + "/pre_dis_hist", pre_dis_hist) torch.save(netD.state_dict(), save_path + "/pretrain_dis_params") print("\n\n\n\n==========================================") if os.path.exists(save_path + "/pretrain_gen_params"): netG.load_state_dict(torch.load(save_path + "/pretrain_gen_params")) else: pretrain_generatr() if os.path.exists(save_path + "/pretrain_dis_params"): netD.load_state_dict(torch.load(save_path + "/pretrain_dis_params")) else: pretrain_discriminator() print("==========================================") print("Info:start main train") dataset.clear_state() pprog.max_iter = conf.max_epoch train_loss_hist = np.zeros((5, conf.max_epoch), dtype="float32") val_loss_hist = np.zeros((5, conf.max_epoch), dtype="float32") val_count = dataset.val_data_len // conf.batch_size if dataset.val_data_len % conf.batch_size != 1: val_count += 1 for i in range(conf.max_epoch): #train loop count = 1 total_g_loss = 0 total_d_loss = 0 total_real_acc = 0 total_fake_acc = 0 total_wrong_acc = 0 start = time.time() for p in netG.parameters(): p.requires_grad = True for p in netD.parameters(): p.requires_grad = True for data in dataset.get_data(): data = toGPU(data, conf.gpu_num) if count % conf.n_dis == 0: loss = updater.update_PG(data) total_g_loss += loss.data.cpu().numpy() loss, real_acc, fake_acc, wrong_acc = updater.update_dis(data) total_d_loss += loss.data.cpu().numpy() total_real_acc += real_acc.data.cpu().numpy() total_fake_acc += fake_acc.data.cpu().numpy() total_wrong_acc += wrong_acc.data.cpu().numpy() count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() train_loss_hist[0, i] = total_d_loss / count train_loss_hist[1, i] = total_real_acc / count train_loss_hist[2, i] = total_fake_acc / count train_loss_hist[3, i] = total_wrong_acc / count train_loss_hist[4, i] = total_g_loss / (count // 5) print("\n\n\n") #val loop print(f"Validation {i+1} / {conf.max_epoch}") count = 0 total_g_loss = 0 total_d_loss = 0 total_real_acc = 0 total_fake_acc = 0 total_wrong_acc = 0 start = time.time() for p in netG.parameters(): p.requires_grad = False for p in netD.parameters(): p.requires_grad = False for data in dataset.get_data(is_val=True): data = toGPU(data, conf.gpu_num) g_loss, d_loss, real_acc, fake_acc, wrong_acc = updater.evaluate( data) count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start progress(count + 1, val_count, elapsed) progress(count, val_count, elapsed) val_loss_hist[0, i] = total_d_loss / count val_loss_hist[1, i] = total_real_acc / count val_loss_hist[2, i] = total_fake_acc / count val_loss_hist[3, i] = total_wrong_acc / count val_loss_hist[4, i] = total_g_loss / (count // 5) print("\u001B[5A", end="") if (i + 1) % conf.snapshot_interval == 0 and not conf.silent: data = dataset.sample(conf.sample_size) sample_generate(netG, data, SAMPLE_SIZE, index2tok, conf.gpu_num,\ conf.noise_dim, preview_path + f"/sample_text_{i+1:04d}.txt") np.save(save_path + "/train_loss_hist", train_loss_hist) np.save(save_path + "/val_loss_hist", val_loss_hist) torch.save(netG.state_dict(), save_path + f"/gen_params_{i+1:04d}.pth") torch.save(netD.state_dict(), save_path + f"/dis_params_{i+1:04d}.pth") if not conf.silent: np.save(save_path + "/train_loss_hist", train_loss_hist) np.save(save_path + "/val_loss_hist", val_loss_hist) data = dataset.sample(conf.sample_size) sample_generate(netG, data, SAMPLE_SIZE, index2tok, conf.gpu_num,\ conf.noise_dim, preview_path + "/sample_text.txt") torch.save(netG.state_dict(), save_path + "/gen_params.pth") torch.save(netD.state_dict(), save_path + "/dis_params.pth") print("\n\n\n\n==========================================") print("Info:finish train")
def main(): conf = get_args() dataset = MSCOCO(conf) VOC_SIZE = dataset.jp_voc_size if conf.use_lang == "jp" else dataset.en_voc_size SAMPLE_SIZE = conf.sample_size // conf.gpu_num if conf.gpu_num > 1 else conf.sample_size SEQ_LEN = conf.seq_len_jp if conf.use_lang == "jp" else conf.seq_len_en index2tok = dataset.jp_index2tok if conf.use_lang == "jp" else dataset.en_index2tok if not conf.silent: save_path = os.path.abspath(script_path + conf.save_path) if not os.path.exists(save_path): os.mkdir(save_path) save_path = os.path.abspath(save_path + f"/{conf.use_lang}") if not os.path.exists(save_path): os.mkdir(save_path) preview_path = os.path.abspath(save_path + "/preview") if not os.path.exists(preview_path): os.mkdir(preview_path) netG, netD = build_models(conf, VOC_SIZE, SEQ_LEN) optimizerG, optimizerD = build_optimizer(netG, netD, conf.adam_lr, conf.adam_beta1, conf.adam_beta2) pprog = print_progress(conf.pre_gen_max_epoch, conf.batch_size, dataset.train_data_len) updater = Updater(netG, netD, optimizerG, optimizerD, conf) """ print("==========================================") print("Info:start genarator pre train") pre_gen_loss_hist = np.zeros((1, conf.pre_gen_max_epoch), dtype="float32") for i in range(conf.pre_gen_max_epoch): count = 0 total_loss = np.array([0.], dtype="float32") start = time.time() for data in dataset.get_data(): break data = toGPU(data, conf.gpu_num) loss = updater.update_pre_gen(data) total_loss += loss.data.cpu().numpy() count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() pre_gen_loss_hist[0,i] = total_loss / count if not conf.silent: sample_generate(netG, SAMPLE_SIZE, index2tok, preview_path + f"/sample_text_pretrain.txt") np.save(save_path + "/pre_gen_loss_hist", pre_gen_loss_hist) torch.save(netG.state_dict(), save_path + "/pretrain_gen_params") print("\n\n\n\n==========================================") print("==========================================") print("Info:start discriminator pre train") dataset.clear_state() pprog.max_iter = conf.pre_dis_max_epoch pre_dis_hist = np.zeros((2, conf.pre_dis_max_epoch), dtype="float32") for i in range(conf.pre_dis_max_epoch): count = 0 total_loss = np.array([0.], dtype="float32") total_acc = np.array([0.], dtype="float32") start = time.time() for data in dataset.get_data(): data = toGPU(data, conf.gpu_num) loss, acc = updater.update_dis(data) total_loss += loss.data.cpu().numpy() total_acc += acc.data.cpu().numpy() count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() pre_dis_hist[0,i] = total_loss / count pre_dis_hist[1,i] = total_acc / count if not conf.silent: np.save(save_path + "/pre_dis_hist", pre_dis_hist) torch.save(netD.state_dict(), save_path + "/pretrain_dis_params") print("\n\n\n\n==========================================") """ print("==========================================") print("Info:start main train") dataset.clear_state() pprog.max_iter = conf.max_epoch loss_hist = np.zeros((3, conf.max_epoch), dtype="float32") for i in range(conf.max_epoch): count = 0 total_g_loss = np.array([0.], dtype="float32") total_d_loss = np.array([0.], dtype="float32") total_acc = np.array([0.], dtype="float32") start = time.time() for data in dataset.get_data(): data = toGPU(data, conf.gpu_num) if count % conf.n_dis == 0: loss = updater.update_PG(data) total_g_loss += loss.data.cpu().numpy() loss, acc = updater.update_dis(data) total_d_loss += loss.data.cpu().numpy() total_acc += loss.data.cpu().numpy() count += 1 if dataset.now_iter % conf.display_interval == 0: elapsed = time.time() - start pprog(elapsed, dataset.get_state) start = time.time() loss_hist[0, i] = total_d_loss / count loss_hist[1, i] = total_acc / count loss_hist[2, i] = total_g_loss / (count // 5) if i % conf.snapshot_interval == 0 and not conf.silent: sample_generate(netG, SAMPLE_SIZE, index2tok, preview_path + f"/sample_text_{i:04d}.txt") np.save(save_path + "/loss_hist", loss_hist) torch.save(netG.state_dict(), save_path + f"/gen_params_{i:04d}.pth") torch.save(netD.state_dict(), save_path + f"/dis_params_{i:04d}.pth") if not conf.silent: np.save(save_path + "/loss_hist", loss_hist) sample_generate(netG, SAMPLE_SIZE, index2tok, preview_path + "/sample_text.txt") torch.save(netG.state_dict(), save_path + "/gen_params.pth") torch.save(netD.state_dict(), save_path + "/dis_params.pth") print("\n\n\n\n==========================================") print("Info:finish train")