def train(cfg): logger = logging.getLogger("JigsawPuzzle") if torch.cuda.is_available(): device = "cuda" torch.backends.cudnn.benchmark = True else: device = "cpu" if cfg.dataset == "MNIST": in_c = 1 else: in_c = 3 train_data = build_dataset(cfg, "train") model = ConvModel(in_c, cfg.pieces, cfg.image_size, cfg.hid_c, cfg.stride, cfg.kernel_size).to(device) optimizer = optim.Adam(model.parameters(), cfg.lr, eps=1e-8) train_loader = DataLoader(train_data, cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, drop_last=True) logger.info("start training") for epoch in range(1, cfg.epochs+1): sum_loss = 0 for i, data in enumerate(train_loader): inputs, _ = data pieces, random_pieces, _ = batch_tch_divide_image(inputs, cfg.pieces) pieces, random_pieces = pieces.to(device), random_pieces.to(device) log_alpha = model(random_pieces) gumbel_sinkhorn_mat = [ gumbel_sinkhorn_ops.gumbel_sinkhorn(log_alpha, cfg.tau, cfg.n_sink_iter) for _ in range(cfg.n_samples) ] est_ordered_pieces = [ gumbel_sinkhorn_ops.inverse_permutation_for_image(random_pieces, gs_mat) for gs_mat in gumbel_sinkhorn_mat ] loss = sum([ torch.nn.functional.mse_loss(X, pieces) for X in est_ordered_pieces ]) optimizer.zero_grad() loss.backward() optimizer.step() sum_loss += loss.item() if cfg.display > 0 and ((i+1) % cfg.display) == 0: logger.info("epoch %i [%i/%i] loss %f", epoch, i+1, len(train_loader), loss.item()) logger.info("epoch %i| mean loss %f", epoch, sum_loss/len(train_loader)) torch.save(model.state_dict(), os.path.join(cfg.out_dir, "model.pth"))
else: model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device) start_itr = 0 c = [] if args.one_class: if args.pretrained_ckpt is not None: model.load_state_dict( torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl')) start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy') c = torch.from_numpy( np.load('./c/' + args.pretrained_ckpt + '.npy')).float().to(device) # computing initial c for one-class out-of-set estimation if len(c) == 0: c = get_c(dataset, model, args) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) for itr in range(start_itr, args.max_iter): train(itr, dataset, args, model, optimizer, logger, device, c) if itr % 500 == 0: torch.save(model.state_dict(), './ckpt/' + args.model_name + '.pkl') np.save('./iter_num/' + args.model_name + '.npy', itr) np.save('./labels/' + args.model_name + '.npy', dataset.labels) if itr in change_itr: gen_new_labels(dataset, model, args, device) if args.one_class and itr % 50 == 0 and itr <= 500: c = get_c(dataset, model, args)
default=100, help='number of samples per color, shape combination') args = parser.parse_args() images_dict = load_images_dict(args.data_n_samples) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") agent1 = ConvModel(vocab_size=args.vocab_size).to(device) agent2 = ConvModel(vocab_size=args.vocab_size).to(device) loss_fn = NLLLoss() optimizer1 = torch.optim.Adam( [p for p in agent1.parameters() if p.requires_grad], args.lr) optimizer2 = torch.optim.Adam( [p for p in agent2.parameters() if p.requires_grad], args.lr) def get_message(s): return ''.join( [chr(97 + int(v.cpu().data)) for v in s if v < args.vocab_size]) def train_round(speaker, listener, batches, optimizer, max_sentence_len, vocab_size): speaker.train(False) listener.train(True) round_total = 0
dataloader = DataLoader(train_set, batch_size=batch_size, num_workers=n_cpu, sampler=RandomSampler(train_split)) valid_dataloader = DataLoader(train_set, batch_size=batch_size, num_workers=n_cpu, sampler=RandomSampler(val_split)) if load_model: with open(load_model_path, 'rb') as f: net = log.load_model().to(device) else: net = ConvModel(1024, 2).to(device) optimizer = torch.optim.Adam(net.parameters()) loss_func = F.binary_cross_entropy print('start trainning ......') for e in range(epochs): net.train() for i, batchs in enumerate(dataloader): sig, other, label = batchs['sig'].to(device), batchs['other'].to( device), batchs['label'].to(device) out = net(sig, other) loss = loss_func(out, label) optimizer.zero_grad() loss.backward()