コード例 #1
0
 def __init__(self,
              model,
              dataset,
              ctx=-1,
              batch_size=128,
              optimizer='sgd',
              lambdas=[0.1, 0.1],
              print_freq=32):
     self.model = model
     self.dataset = dataset
     self.batch_size = batch_size
     self.optbb = optim.SGD(chain(self.model.age_classifier.parameters(),
                                  self.model.RFM.parameters(),
                                  self.model.margin_fc.parameters(),
                                  self.model.backbone.parameters()),
                            lr=0.01,
                            momentum=0.9)
     self.optDAL = optim.SGD(self.model.DAL.parameters(),
                             lr=0.01,
                             momentum=0.9)
     self.lambdas = lambdas
     self.print_freq = print_freq
     self.id_recorder = Recorder()
     self.age_recorder = Recorder()
     self.trainingDAL = False
     if ctx < 0:
         self.ctx = torch.device('cpu')
     else:
         self.ctx = torch.device(f'cuda:{ctx}')
コード例 #2
0
 def __init__(self, arg):
     self.arg = arg
     self.save_arg()
     if self.arg.random_fix:
         self.rng = RandomState(seed=self.arg.random_seed)
     self.device = GpuDataParallel()
     self.recoder = Recorder(self.arg.work_dir, self.arg.print_log)
     self.data_loader = {}
     self.topk = (1, 5)
     self.stat = Stat(self.arg.model_args['num_classes'], self.topk)
     self.model, self.optimizer = self.Loading()
     self.loss = self.criterion()
コード例 #3
0
    def __init__(self, worker_id, num_env, game_name, n_stack, child_conn,
                 args):

        super(worker, self).__init__()

        self.daemon = True

        self.worker_id = worker_id
        self.num_env = num_env
        self.n_stack = n_stack

        self.child_conn = child_conn
        self.args = args

        self.envs = []
        self.index_base = worker_id * num_env
        self.episode_length = [0] * num_env

        for i in range(num_env):
            time.sleep(0.1)
            access_index = self.index_base + i
            env = atari(game_name, n_stack)
            env.reset()
            self.envs.append(env)

        if args.record == True:
            self.recorder = []
            for i in range(num_env):
                self.recorder.append(
                    Recorder(int(worker_id * num_env + i), game_name))
コード例 #4
0
 def __init__(
     self, model, dataset, ctx=-1, batch_size=128, optimizer='sgd', 
     grad_accu=1, lambdas=[0.05, 0.1], print_freq=32, train_head_only=True
 ):
     self.model = model
     self.dataset = dataset
     self.batch_size = batch_size
     self.finetune_layers = (
         # self.model.backbone.repeat_3[-1:], 
         self.model.backbone.last_bn, 
         self.model.backbone.last_linear, self.model.backbone.block8
     )        
     first_group = [
         {
             "params": chain(
                 self.model.age_classifier.parameters(),
                 self.model.RFM.parameters(),
                 self.model.margin_fc.parameters(),
             ),
             "lr": 5e-4
         }
     ]
     if not train_head_only:
         # first_group[0]["lr"] = 1e-4
         first_group.append(
             {
                 "params": chain(
                     *(x.parameters() for x in self.finetune_layers)
                 ),
                 "lr": 5e-5
             }
         )
     self.optbb = RAdam(first_group)
     self.optDAL = RAdam(self.model.DAL.parameters(), lr=5e-4)
     self.lambdas = lambdas
     self.print_freq = print_freq
     self.id_recorder = Recorder()
     self.age_recorder = Recorder()
     self.trainingDAL = False
     if ctx < 0:
         self.ctx = torch.device('cpu')
     else:
         self.ctx = torch.device(f'cuda:{ctx}')
     self.scaler1 = GradScaler()
     self.scaler2 = GradScaler()
     self.grad_accu = grad_accu
     self.train_head_only = train_head_only
コード例 #5
0
ファイル: main.py プロジェクト: thexl74/FlappyBirdReinforce
def test(args):
    model_path = sorted(glob(os.path.join('ckpt', args.tag, '*.pth')))[-1]
    model = torch.load(model_path, map_location='cpu').eval()
    print('Loaded model: {}'.format(model_path))
    model_name = os.path.basename(os.path.splitext(model_path)[0])
    # initialize video writer
    video_filename = 'output_{}_{}.avi'.format(args.tag, model_name)

    dict_screen_shape = {"flappy": (288, 512), "pixelcopter": (48, 48)}
    out = Recorder(video_filename=video_filename,
                   fps=30,
                   width=dict_screen_shape[args.game][0],
                   height=dict_screen_shape[args.game][1])
    score_list = []
    time_list = []

    game = Game(game=args.game)
    for trials in range(10):

        elapsed_Time = 0
        action = torch.zeros([model.number_of_actions], dtype=torch.float32)
        terminal = game.game_over()
        start = time.time()
        score = 0

        image_data = game.get_torch_image()
        state = torch.cat(
            (image_data, image_data, image_data, image_data)).unsqueeze(0)
        while not terminal:
            output = model(state)[0]
            action = torch.zeros([model.number_of_actions],
                                 dtype=torch.float32)
            action_index = torch.argmax(output)
            score += game.act(action_index)
            terminal = game.game_over()
            image_data_1 = game.get_torch_image()
            state = torch.cat(
                (state.squeeze(0)[1:, :, :], image_data_1)).unsqueeze(0)

            out.write(game.get_image())

        game.reset_game()
        score_list.append(score)
        time_list.append(time.time() - start)
        print('Game Ended!')
        print('Score: {} !'.format(score))

    # Add summary
    out.write_score(sum(score_list), sum(time_list))
    out.save()
    print('Total Score: {}'.format(sum(score_list)))
    print('Total Run Time: {:.3f}'.format(sum(time_list)))
    print('Saved video: {}'.format(video_filename))
コード例 #6
0
def train():
    model, recorder = mdl.Classifier(), Recorder()
    optimizer = torch.optim.Adam(model.parameters(),
                                 weight_decay=constants.WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, constants.EPOCHS)
    trainer = Trainer(model, optimizer, scheduler, recorder)

    trainer.fit(constants.EPOCHS)
    trainer.save_model()
    recorder.plot()
コード例 #7
0
ファイル: main.py プロジェクト: JaSchnedler/RecorderFun
def main():
	global ssn, folder
	runbool = True
	while runbool:
		mkdir()
		ssn = None
		print("Program started")
		while not ssnreceived():
			print('waiting for ssn')
		rec = Recorder(ssn,folder)
		rec.run()
		stop = None
		while stop is None:
			stop = input('Enter something to stop the recording')
			print(' ')
			if str(stop.strip()) == 'stop':
				runbool = False
			time.sleep(0.2)
			print(stop)
		if rec is not None:
			rec.stop()
		filename = ssn
		if os.path.isfile(folder + filename + '.wav'):
			db = Database(folder, ssn, filename)
			db.upload()
			db.adduser()
			db.addfile()
			db.addfiletouser()

		else:
			print('Not added to database')
コード例 #8
0
ファイル: train.py プロジェクト: leibo-cmu/MatSeg
def train(args):
    Arguments.save_args(args, args.args_path)
    train_loader, val_loader, _ = get_dataloaders(args)
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    optimizer = get_optimizer(args.optimizer, model)
    lr_scheduler = LRScheduler(args.lr_scheduler, optimizer)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    model_saver = ModelSaver(args.model_path)
    recorder = Recorder(['train_miou', 'train_acc', 'train_loss',
                         'val_miou', 'val_acc', 'val_loss'])
    for epoch in range(args.n_epochs):
        print(f"{args.experim_name} Epoch {epoch+1}:")
        train_loss, train_acc, train_miou, train_ious = train_epoch(
            model=model,
            dataloader=train_loader,
            n_classes=args.n_classes,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            criterion=criterion,
            device=args.device,
        )
        print(f"train | mIoU: {train_miou:.3f} | accuracy: {train_acc:.3f} | loss: {train_loss:.3f}")
        val_loss, val_scores = eval_epoch(
            model=model,
            dataloader=val_loader,
            n_classes=args.n_classes,
            criterion=criterion,
            device=args.device,
        )
        val_miou, val_ious, val_acc = val_scores['mIoU'], val_scores['IoUs'], val_scores['accuracy']
        print(f"valid | mIoU: {val_miou:.3f} | accuracy: {val_acc:.3f} | loss: {val_loss:.3f}")
        recorder.update([train_miou, train_acc, train_loss, val_miou, val_acc, val_loss])
        recorder.save(args.record_path)
        if args.metric.startswith("IoU"):
            metric = val_ious[int(args.metric.split('_')[1])]
        else: metric = val_miou
        model_saver.save_models(metric, epoch+1, model,
                                ious={'train': train_ious, 'val': val_ious})

    print(f"best model at epoch {model_saver.best_epoch} with miou {model_saver.best_score:.5f}")
コード例 #9
0
def run(rank, args):
    base_setting(args)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    gpuid = args.gpuid[rank]
    is_master = rank == 0
    is_mp = len(args.gpuid) > 1
    world_size = len(args.gpuid)
    if is_master:
        recorder = Recorder(args.log)
    tok = BertTokenizer.from_pretrained(args.model_type)
    if args.use_ids:
        collate_fn = partial(collate_mp_ids,
                             pad_token_id=tok.pad_token_id,
                             is_test=False)
        collate_fn_val = partial(collate_mp_ids,
                                 pad_token_id=tok.pad_token_id,
                                 is_test=True)
        train_set = RefactoringIDsDataset(
            f"./{args.dataset}/{args.datatype}/train",
            args.model_type,
            maxlen=args.max_len,
            max_num=args.max_num)
        val_set = RefactoringIDsDataset(
            f"./{args.dataset}/{args.datatype}/val",
            args.model_type,
            is_test=True,
            maxlen=512,
            is_sorted=False)
    else:
        collate_fn = partial(collate_mp,
                             pad_token_id=tok.pad_token_id,
                             is_test=False)
        collate_fn_val = partial(collate_mp,
                                 pad_token_id=tok.pad_token_id,
                                 is_test=True)
        train_set = RefactoringDataset(
            f"./{args.dataset}/{args.datatype}/train",
            args.model_type,
            maxlen=args.max_len,
            maxnum=args.max_num)
        val_set = RefactoringDataset(f"./{args.dataset}/{args.datatype}/val",
                                     args.model_type,
                                     is_test=True,
                                     maxlen=512,
                                     is_sorted=False,
                                     maxnum=args.max_num)
    if is_mp:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_set, num_replicas=world_size, rank=rank, shuffle=True)
        dataloader = DataLoader(train_set,
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=4,
                                collate_fn=collate_fn,
                                sampler=train_sampler)
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_set, num_replicas=world_size, rank=rank)
        val_dataloader = DataLoader(val_set,
                                    batch_size=8,
                                    shuffle=False,
                                    num_workers=4,
                                    collate_fn=collate_fn_val,
                                    sampler=val_sampler)
    else:
        dataloader = DataLoader(train_set,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=4,
                                collate_fn=collate_fn)
        val_dataloader = DataLoader(val_set,
                                    batch_size=8,
                                    shuffle=False,
                                    num_workers=4,
                                    collate_fn=collate_fn_val)
    # build models
    model_path = args.pretrained if args.pretrained is not None else args.model_type
    model = Refactor(model_path, num_layers=args.num_layers)

    if args.model_pt is not None:
        model.load_state_dict(
            torch.load(args.model_pt, map_location=f'cuda:{gpuid}'))
    if args.cuda:
        if len(args.gpuid) == 1:
            model = model.cuda()
        else:
            dist.init_process_group("nccl", rank=rank, world_size=world_size)
            model = nn.parallel.DistributedDataParallel(
                model.to(gpuid), [gpuid], find_unused_parameters=True)
    model.train()
    init_lr = args.max_lr / args.warmup_steps
    optimizer = optim.Adam(model.parameters(), lr=init_lr)
    if is_master:
        recorder.write_config(args, [model], __file__)
    minimum_loss = 100
    all_step_cnt = 0
    # start training
    for epoch in range(args.epoch):
        optimizer.zero_grad()
        step_cnt = 0
        steps = 0
        avg_loss = 0
        for (i, batch) in enumerate(dataloader):
            if args.cuda:
                to_cuda(batch, gpuid)
            step_cnt += 1
            output = model(batch["src_input_ids"], batch["candidate_ids"],
                           batch["tgt_input_ids"])
            similarity, gold_similarity = output['score'], output[
                'summary_score']
            loss = args.scale * RankingLoss(similarity,
                                            gold_similarity,
                                            args.margin,
                                            args.gold_margin,
                                            args.gold_weight,
                                            no_gold=args.no_gold)
            loss = loss / args.accumulate_step
            avg_loss += loss.item()
            loss.backward()
            if step_cnt == args.accumulate_step:
                if args.grad_norm > 0:
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             args.grad_norm)
                step_cnt = 0
                steps += 1
                all_step_cnt += 1
                lr = args.max_lr * min(
                    all_step_cnt**(-0.5),
                    all_step_cnt * (args.warmup_steps**(-1.5)))
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                optimizer.step()
                optimizer.zero_grad()
            if steps % args.report_freq == 0 and step_cnt == 0 and is_master:
                recorder.print("epoch: %d, batch: %d, avg loss: %.6f" %
                               (epoch + 1, steps, avg_loss / args.report_freq))
                recorder.print(f"learning rate: {lr:.6f}")
                recorder.plot("loss", {"loss": avg_loss / args.report_freq},
                              all_step_cnt)
                recorder.print()
                avg_loss = 0
            del similarity, gold_similarity, loss

            if all_step_cnt % args.test_freq == 0 and all_step_cnt != 0 and step_cnt == 0:
                loss = test(val_dataloader, model, args, gpuid)
                if loss < minimum_loss and is_master:
                    minimum_loss = loss
                    if is_mp:
                        recorder.save(model.module, "model.bin")
                    else:
                        recorder.save(model, "model.bin")
                    recorder.save(optimizer, "optimizer.bin")
                    recorder.print("best - epoch: %d, batch: %d" %
                                   (epoch + 1, i / args.accumulate_step + 1))
                if is_master:
                    if is_mp:
                        recorder.save(model.module, "model_cur.bin")
                    else:
                        recorder.save(model, "model_cur.bin")
                    recorder.save(optimizer, "optimizer_cur.bin")
                    recorder.print("val score: %.6f" % (1 - loss))
コード例 #10
0
    cfg['saveto'] = './model_200/'
    cfg['report_interval'] = args.report

    print(cfg)
    train_data = batchify(corpus.train, cfg['batch_size'])
    val_data = batchify(corpus.valid, cfg['batch_size'])
    test_data = batchify(corpus.test, cfg['batch_size'])

    with open(cfg['init'], 'rb') as f:
        policy = torch.load(f)
        print(policy)

    reinforce_model = Reinforce(policy=policy,
                                sigma=cfg['sigma'],
                                gamma=cfg['gamma'])
    recorder = Recorder(output_path=cfg['output_file'])

    valid_loss = []
    loss = evaluate(val_data, reinforce_model.policy, cfg)
    print('start from valid loss = ', loss)
    valid_loss.append(loss)

    ntokens = cfg['dict_size']

    optimizer = optim.Adam(reinforce_model.parameters(), lr=cfg['lr'])
    start_time = time.time()
    for epoch in range(cfg['epochs']):
        total_loss = 0.0
        total_LM_loss = 0.0

        for i in range(0, train_data.size(0) - 1, cfg['max_len']):
コード例 #11
0
class Trainer():
    def __init__(
        self, model, dataset, ctx=-1, batch_size=128, optimizer='sgd', 
        grad_accu=1, lambdas=[0.05, 0.1], print_freq=32, train_head_only=True
    ):
        self.model = model
        self.dataset = dataset
        self.batch_size = batch_size
        self.finetune_layers = (
            # self.model.backbone.repeat_3[-1:], 
            self.model.backbone.last_bn, 
            self.model.backbone.last_linear, self.model.backbone.block8
        )        
        first_group = [
            {
                "params": chain(
                    self.model.age_classifier.parameters(),
                    self.model.RFM.parameters(),
                    self.model.margin_fc.parameters(),
                ),
                "lr": 5e-4
            }
        ]
        if not train_head_only:
            # first_group[0]["lr"] = 1e-4
            first_group.append(
                {
                    "params": chain(
                        *(x.parameters() for x in self.finetune_layers)
                    ),
                    "lr": 5e-5
                }
            )
        self.optbb = RAdam(first_group)
        self.optDAL = RAdam(self.model.DAL.parameters(), lr=5e-4)
        self.lambdas = lambdas
        self.print_freq = print_freq
        self.id_recorder = Recorder()
        self.age_recorder = Recorder()
        self.trainingDAL = False
        if ctx < 0:
            self.ctx = torch.device('cpu')
        else:
            self.ctx = torch.device(f'cuda:{ctx}')
        self.scaler1 = GradScaler()
        self.scaler2 = GradScaler()
        self.grad_accu = grad_accu
        self.train_head_only = train_head_only

    def train(self, epochs, start_epoch, save_path=None):
        self.train_ds = ImageFolderWithAges(
            self.dataset['pat'], self.dataset['pos'],
            transforms=Compose(
                [
                    HorizontalFlip(p=0.5),
                    OneOf([
                        IAAAdditiveGaussianNoise(),
                        GaussNoise(),
                    ], p=0.25),
                    Resize(200, 200, cv2.INTER_AREA),
                    ToTensor(normalize=dict(
                        mean=[0.5, 0.5, 0.5], std=[0.50196, 0.50196, 0.50196])
                    )
                ]
            ),
            root=self.dataset['train_root'],
        )
        self.train_ld = DataLoader(
            self.train_ds, shuffle=True, batch_size=self.batch_size, num_workers=2, drop_last=True, pin_memory=True
        )
        print("# Batches:", len(self.train_ld))
        if self.dataset['val_root'] is not None:
            self.val_ds = ImageFolderWithAges(
                self.dataset['pat'], self.dataset['pos'],
                root=self.dataset['val_root'],
                transforms=Compose([
                    Resize(200, 200, cv2.INTER_AREA),
                    ToTensor(normalize=dict(
                        mean=[0.5, 0.5, 0.5], std=[0.50196, 0.50196, 0.50196])
                    )
                ])
            )
            self.val_ld = DataLoader(self.val_ds, shuffle=False, batch_size=self.batch_size,
                                     pin_memory=True, num_workers=1)
        self.model = self.model.to(self.ctx)
        total_steps = len(self.train_ld) * epochs
        lr_durations = [
            int(total_steps*0.05),
            int(np.ceil(total_steps*0.95))
        ]
        break_points = [0] + list(np.cumsum(lr_durations))[:-1]
        self.schedulers = [
            MultiStageScheduler(
                [
                    LinearLR(self.optbb, 0.01, lr_durations[0]),
                    CosineAnnealingLR(self.optbb, lr_durations[1], eta_min=1e-6)
                ],
                start_at_epochs=break_points
            ),
            MultiStageScheduler(
                [
                    LinearLR(self.optDAL, 0.01, lr_durations[0]),
                    CosineAnnealingLR(self.optDAL, lr_durations[1], eta_min=1e-6)
                ],
                start_at_epochs=break_points
            )
        ]
        if self.train_head_only:
            set_trainable(self.model.backbone, False)
            for module in self.model.backbone.modules():
                if isinstance(module, (nn.BatchNorm2d, nn.BatchNorm1d)):
                    module.track_running_stats = False
        else:
            set_trainable(self.model.backbone, False)
            # for module in self.model.backbone.modules():
            #     if isinstance(module, (nn.BatchNorm2d, nn.BatchNorm1d)):
            #         module.track_running_stats = False
            for module in self.finetune_layers:
                set_trainable(module, True)
                # for submodule in chain([module], module.modules()):
                #     if isinstance(submodule, (nn.BatchNorm2d, nn.BatchNorm1d)):
                #         submodule.track_running_stats = True
        count_model_parameters(self.model)                
        # print(self.optbb.param_groups[-1]["lr"])
        # print(self.optDAL.param_groups[-1]["lr"])
        for epoch in range(epochs):
            print(f'---- epoch {epoch} ----')
            self.update()
            if self.dataset['val_root'] is not None:
                acc = self.validate()
            else:
                acc = -1.
            if save_path is not None:
                torch.save(self.model.state_dict(), os.path.join(save_path, f'{start_epoch+epoch}_{acc:.4f}.pth'))

    def update(self):
        print('    -- Training --')
        self.model.train()
        self.model.backbone.eval()
        if not self.train_head_only:
            for module in self.finetune_layers:
                module.train()
                # for submodule in chain([module], module.modules()):
                #     if isinstance(submodule, (nn.BatchNorm2d, nn.BatchNorm1d)):
                #         submodule.eval()
        self.id_recorder.reset()
        self.age_recorder.reset()
        for i, (xs, ys, agegrps) in enumerate(self.train_ld):
            if i % 80 == 0:  # canonical maximization procesure
                self.set_train_mode(False)
            elif i % 80 == 28:  # RFM optimize procesure
                self.set_train_mode(True)
            xs, ys, agegrps = xs.to(self.ctx), ys.to(self.ctx), agegrps.to(self.ctx)
            with autocast():
                self.model(xs, ys, agegrps=agegrps)
            idLoss, id_acc, ageLoss, age_acc, cc = self.model(xs, ys, agegrps=agegrps)
            #print(f'        ---\n{idLoss}\n{id_acc}\n{ageLoss}\n{age_acc}\n{cc}')
            total_loss = idLoss + ageLoss*self.lambdas[0] + cc*self.lambdas[1]
            total_loss /= self.grad_accu
            self.id_recorder.gulp(len(agegrps), idLoss.detach().item(), id_acc.detach().item())
            self.age_recorder.gulp(len(agegrps), ageLoss.detach().item(), age_acc.detach().item())
            if i % self.print_freq == 0:
                print(
                    f'        iter: {i} {i%70} total loss: {total_loss.item():.4f} ({idLoss.item():.4f}, {id_acc.item():.4f}, {ageLoss.item():.4f}, {age_acc.item():.4f}, {cc.item():.8f}) {self.optbb.param_groups[-1]["lr"]:.6f}')
            if self.trainingDAL:
                self.scaler1.scale(-1 * cc*self.lambdas[1]).backward()
                # total_loss.backward()
                # Trainer.flip_grads(self.model.DAL)
                if (i + 1) % self.grad_accu == 0:
                    # self.optDAL.step()
                    self.scaler1.step(self.optDAL)
                    self.scaler1.update()
                    self.optDAL.zero_grad()
            else:
                self.scaler2.scale(total_loss).backward()
                # total_loss.backward()
                # self.optbb.step()
                if (i + 1) % self.grad_accu == 0:
                    self.scaler2.step(self.optbb)
                    self.scaler2.update()
                    self.optbb.zero_grad()                    
            for scheduler in self.schedulers:
                scheduler.step()
        # show average training meta after epoch
        print(f'        {self.id_recorder.excrete().result_as_string()}')
        print(f'        {self.age_recorder.excrete().result_as_string()}')

    def validate(self):
        print('    -- Validating --')
        self.model.eval()
        self.id_recorder.reset()
        self.age_recorder.reset()
        for i, (xs, ys, agegrps) in enumerate(self.val_ld):
            xs, ys, agegrps = xs.to(self.ctx), ys.to(self.ctx), agegrps.to(self.ctx)
            with torch.no_grad():
                with autocast():
                    idLoss, id_acc, ageLoss, age_acc, cc = self.model(xs, ys, agegrps)
                # total_loss = idLoss + ageLoss*self.lambdas[0] + cc*self.lambdas[1]
                self.id_recorder.gulp(len(agegrps), idLoss.item(), id_acc.item())
                self.age_recorder.gulp(len(agegrps), ageLoss.item(), age_acc.item())
        # show average validation meta after epoch
        print(f'        {self.id_recorder.excrete().result_as_string()}')
        print(f'        {self.age_recorder.excrete().result_as_string()}')
        return self.id_recorder.acc

    def set_train_mode(self, state):
        self.trainingDAL = not state
    #     Trainer.set_grads(self.model.RFM, state)
    #     # Trainer.set_grads(self.model.backbone, state)
    #     Trainer.set_grads(self.model.margin_fc, state)
    #     Trainer.set_grads(self.model.age_classifier, state)
    #     Trainer.set_grads(self.model.DAL, not state)

    @staticmethod
    def set_grads(mod, state):
        for para in mod.parameters():
            para.requires_grad = state

    @staticmethod
    def flip_grads(mod):
        for para in mod.parameters():
            if para.requires_grad:
                para.grad = - para.grad
コード例 #12
0
class Processor():
    def __init__(self, arg):
        self.arg = arg
        self.save_arg()
        if self.arg.random_fix:
            self.rng = RandomState(seed=self.arg.random_seed)
        self.device = GpuDataParallel()
        self.recoder = Recorder(self.arg.work_dir, self.arg.print_log)
        self.data_loader = {}
        self.topk = (1, 5)
        self.stat = Stat(self.arg.model_args['num_classes'], self.topk)
        self.model, self.optimizer = self.Loading()
        self.loss = self.criterion()

    def criterion(self):
        loss = nn.CrossEntropyLoss(reduction="none")
        return self.device.criterion_to_device(loss)

    def train(self, epoch):
        self.model.train()
        self.recoder.print_log('Training epoch: {}'.format(epoch + 1))
        loader = self.data_loader['train']
        loss_value = []
        self.recoder.timer_reset()
        current_learning_rate = [
            group['lr'] for group in self.optimizer.optimizer.param_groups
        ]
        for batch_idx, data in enumerate(loader):
            self.recoder.record_timer("dataloader")
            image = self.device.data_to_device(data[0])
            label = self.device.data_to_device(data[1])
            self.recoder.record_timer("device")
            output = self.model(image)
            self.recoder.record_timer("forward")
            loss = torch.mean(self.loss(output, label))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.recoder.record_timer("backward")
            loss_value.append(loss.item())
            if batch_idx % self.arg.log_interval == 0:
                # self.viz.append_loss(epoch * len(loader) + batch_idx, loss.item())
                self.recoder.print_log(
                    '\tEpoch: {}, Batch({}/{}) done. Loss: {:.8f}  lr:{:.6f}'.
                    format(epoch, batch_idx, len(loader), loss.item(),
                           current_learning_rate[0]))
                self.recoder.print_time_statistics()
        self.optimizer.scheduler.step()
        self.recoder.print_log('\tMean training loss: {:.10f}.'.format(
            np.mean(loss_value)))

    def eval(self, loader_name):
        self.model.eval()
        for l_name in loader_name:
            loader = self.data_loader[l_name]
            loss_mean = []
            for batch_idx, data in enumerate(loader):
                image = self.device.data_to_device(data[0])
                label = self.device.data_to_device(data[1])
                # Cal = CalculateParasAndFLOPs()
                # Cal.reset()
                # Cal.calculate_all(self.model, image)
                with torch.no_grad():
                    output = self.model(image)
                # loss = torch.mean(self.loss(output, label))
                loss_mean += self.loss(output,
                                       label).cpu().detach().numpy().tolist()
                self.stat.update_accuracy(output.data.cpu(),
                                          label.cpu(),
                                          topk=self.topk)
            self.recoder.print_log('mean loss: ' + str(np.mean(loss_mean)))

    def Loading(self):
        self.device.set_device(self.arg.device)
        print("Loading model")
        if self.arg.model:
            model_class = import_class(self.arg.model)
            model = self.device.model_to_device(
                model_class(**self.arg.model_args))
            if self.arg.weights:
                try:
                    print("Loading pretrained model...")
                    state_dict = torch.load(self.arg.weights)
                    for w in self.arg.ignore_weights:
                        if state_dict.pop(w, None) is not None:
                            print('Sucessfully Remove Weights: {}.'.format(w))
                        else:
                            print('Can Not Remove Weights: {}.'.format(w))
                    model.load_state_dict(state_dict, strict=True)
                    optimizer = Optimizer(model, self.arg.optimizer_args)
                except RuntimeError:
                    print("Loading from checkpoint...")
                    state_dict = torch.load(self.arg.weights)
                    self.rng.set_rng_state(state_dict['rng_state'])
                    self.arg.optimizer_args[
                        'start_epoch'] = state_dict["epoch"] + 1
                    self.recoder.print_log(
                        "Resuming from checkpoint: epoch {}".format(
                            self.arg.optimizer_args['start_epoch']))
                    model = self.device.load_weights(model, self.arg.weights,
                                                     self.arg.ignore_weights)
                    optimizer = Optimizer(model, self.arg.optimizer_args)
                    optimizer.optimizer.load_state_dict(
                        state_dict["optimizer_state_dict"])
                    optimizer.scheduler.load_state_dict(
                        state_dict["scheduler_state_dict"])
            else:
                optimizer = Optimizer(model, self.arg.optimizer_args)
        else:
            raise ValueError("No Models.")
        print("Loading model finished.")
        self.load_data()
        return model, optimizer

    def load_data(self):
        print("Loading data")
        Feeder = import_class(self.arg.dataloader)
        self.data_loader = dict()
        if self.arg.train_loader_args != {}:
            self.data_loader['train'] = torch.utils.data.DataLoader(
                dataset=Feeder(**self.arg.train_loader_args),
                batch_size=self.arg.batch_size,
                shuffle=True,
                drop_last=True,
                num_workers=self.arg.num_worker,
            )
        if self.arg.valid_loader_args != {}:
            self.data_loader['valid'] = torch.utils.data.DataLoader(
                dataset=Feeder(**self.arg.valid_loader_args),
                batch_size=self.arg.test_batch_size,
                shuffle=False,
                drop_last=False,
                num_workers=self.arg.num_worker,
            )
        if self.arg.test_loader_args != {}:
            test_dataset = Feeder(**self.arg.test_loader_args)
            self.stat.test_size = len(test_dataset)
            self.data_loader['test'] = torch.utils.data.DataLoader(
                dataset=test_dataset,
                batch_size=self.arg.test_batch_size,
                shuffle=False,
                drop_last=False,
                num_workers=self.arg.num_worker,
            )
        print("Loading data finished.")

    def start(self):
        if self.arg.phase == 'train':
            self.recoder.print_log('Parameters:\n{}\n'.format(
                str(vars(self.arg))))
            for epoch in range(self.arg.optimizer_args['start_epoch'],
                               self.arg.num_epoch):
                save_model = ((epoch + 1) % self.arg.save_interval == 0) or \
                             (epoch + 1 == self.arg.num_epoch)
                eval_model = ((epoch + 1) % self.arg.eval_interval == 0) or \
                             (epoch + 1 == self.arg.num_epoch)
                self.train(epoch)
                if save_model:
                    model_path = '{}/epoch{}_model.pt'.format(
                        self.arg.work_dir, epoch + 1)
                    self.save_model(epoch, self.model, self.optimizer,
                                    model_path)
                if eval_model:
                    if self.arg.valid_loader_args != {}:
                        self.stat.reset_statistic()
                        self.eval(loader_name=['valid'])
                        self.print_inf_log(epoch + 1, "Valid")
                    if self.arg.test_loader_args != {}:
                        self.stat.reset_statistic()
                        self.eval(loader_name=['test'])
                        self.print_inf_log(epoch + 1, "Test")
        elif self.arg.phase == 'test':
            if self.arg.weights is None:
                raise ValueError('Please appoint --weights.')
            self.recoder.print_log('Model:   {}.'.format(self.arg.model))
            self.recoder.print_log('Weights: {}.'.format(self.arg.weights))
            if self.arg.valid_loader_args != {}:
                self.stat.reset_statistic()
                self.eval(loader_name=['valid'])
                self.print_inf_log(self.arg.optimizer_args['start_epoch'],
                                   "Valid")
            if self.arg.test_loader_args != {}:
                self.stat.reset_statistic()
                self.eval(loader_name=['test'])
                self.print_inf_log(self.arg.optimizer_args['start_epoch'],
                                   "Test")
            self.recoder.print_log('Evaluation Done.\n')

    def print_inf_log(self, epoch, mode):
        static = self.stat.show_accuracy('{}/{}_confusion_mat'.format(
            self.arg.work_dir, mode))
        prec1 = static[str(self.topk[0])] / self.stat.test_size * 100
        prec5 = static[str(self.topk[1])] / self.stat.test_size * 100
        self.recoder.print_log(
            "Epoch {}, {}, Evaluation: prec1 {:.4f}, prec5 {:.4f}".format(
                epoch, mode, prec1, prec5),
            '{}/{}.txt'.format(self.arg.work_dir, self.arg.phase))

    def save_model(self, epoch, model, optimizer, save_path):
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.optimizer.state_dict(),
                'scheduler_state_dict': optimizer.scheduler.state_dict(),
                'rng_state': self.rng.save_rng_state()
            }, save_path)

    def save_arg(self):
        arg_dict = vars(self.arg)
        if not os.path.exists(self.arg.work_dir):
            os.makedirs(self.arg.work_dir)
        with open('{}/config.yaml'.format(self.arg.work_dir), 'w') as f:
            yaml.dump(arg_dict, f)
コード例 #13
0
ファイル: main.py プロジェクト: KovenYu/ReID_IDE_pytorch
def main():
    opts = BaseOptions()
    args = opts.parse()
    logger = Logger(args.save_path)
    opts.print_options(logger)

    mean = np.array([0.485, 0.406, 0.456])
    std = np.array([0.229, 0.224, 0.225])

    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop((224, 224), padding=7),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    test_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean, std)])

    train_data = Market('data/{}.mat'.format(args.dataset),
                        state='train',
                        transform=train_transform)
    gallery_data = Market('data/{}.mat'.format(args.dataset),
                          state='gallery',
                          transform=test_transform)
    probe_data = Market('data/{}.mat'.format(args.dataset),
                        state='probe',
                        transform=test_transform)
    num_classes = train_data.return_num_class()

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=2,
                                               pin_memory=True,
                                               drop_last=True)
    gallery_loader = torch.utils.data.DataLoader(gallery_data,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)
    probe_loader = torch.utils.data.DataLoader(probe_data,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=2,
                                               pin_memory=True)

    net = resnet.resnet50(pretrained=False, num_classes=num_classes).cuda()
    checkpoint = torch.load(args.pretrain_path)
    fixed_layers = ('fc', )
    state_dict = reset_state_dict(checkpoint, net, *fixed_layers)
    net.load_state_dict(state_dict)
    logger.print_log('loaded pre-trained feature net')

    criterion_CE = nn.CrossEntropyLoss().cuda()

    bn_params, conv_params = partition_params(net, 'bn')

    optimizer = torch.optim.SGD([{
        'params': bn_params,
        'weight_decay': 0
    }, {
        'params': conv_params
    }],
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=args.wd)

    train_stats = ('acc', 'loss')
    val_stats = ('acc', )
    recorder = Recorder(args.epochs, val_stats[0], train_stats, val_stats)
    logger.print_log(
        'observing training stats: {} \nvalidation stats: {}'.format(
            train_stats, val_stats))

    start_epoch = 0
    if args.resume:
        if os.path.isfile(args.resume):
            logger.print_log("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            recorder = checkpoint['recorder']
            start_epoch = checkpoint['epoch']
            net.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.print_log("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            logger.print_log("=> no checkpoint found at '{}'".format(
                args.resume))

    # Main loop
    start_time = time.time()
    epoch_time = AverageMeter()

    for epoch in range(start_epoch, args.epochs):

        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)

        logger.print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s}'.format(
            time_string(), epoch, args.epochs, need_time))

        lr, _ = adjust_learning_rate(optimizer, (args.lr, args.lr), epoch,
                                     args.epochs, args.lr_strategy)
        print("   lr:{}".format(lr))

        train(train_loader, net, criterion_CE, optimizer, epoch, recorder,
              logger, args)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'recorder': recorder,
                'optimizer': optimizer.state_dict(),
            }, False, args.save_path, 'checkpoint.pth.tar')
        recorder.plot_curve(os.path.join(args.save_path, 'curve.png'))

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    evaluate(gallery_loader, probe_loader, net, args.epochs - 1, recorder,
             logger)
コード例 #14
0
ファイル: main.py プロジェクト: Liwb5/pytorch-template
def main(config, resume):
    set_seed(config['seed'])

    train_recorder = Recorder()

    # setup data_loader instances
    train_data = getattr(module_data, config['dataloader']['type'])(
        data_path=config['dataloader']['args']['train_data'],
        data_quota=config['dataloader']['args']['data_quota'])
    logging.info('using %d examples to train. ' % len(train_data))
    data_loader = DataLoader(
        dataset=train_data,
        batch_size=config['dataloader']['args']['batch_size'])

    #  val_data = getattr(module_data, config['dataloader']['type'])(
    #                      data_path = config['dataloader']['args']['val_data'],
    #                      data_quota = config['dataloader']['args']['data_quota']
    #                      )
    #  logging.info('using %d examples to val. ' % len(val_data))
    #  valid_data_loader = DataLoader(dataset = val_data,
    #                          batch_size = config['data_loader']['batch_size'])

    # build model architecture
    model = getattr(models, config['model']['type'])(config['model']['args'],
                                                     device=config['device'])

    logging.info(['my PID is: ', os.getpid()])
    # get function handles of loss and metrics
    loss = getattr(module_loss, config['loss'])()

    #  metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    g_trainable_params = filter(lambda p: p.requires_grad,
                                model.G.parameters())
    g_optimizer = getattr(torch.optim,
                          config['optimizer']['generator']['type'])(
                              g_trainable_params,
                              **config['optimizer']['generator']['args'])

    d_trainable_params = filter(lambda p: p.requires_grad,
                                model.D.parameters())
    d_optimizer = getattr(torch.optim,
                          config['optimizer']['discriminator']['type'])(
                              d_trainable_params,
                              **config['optimizer']['discriminator']['args'])

    trainer = Trainer(model,
                      loss,
                      g_optimizer,
                      d_optimizer,
                      resume=resume,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=None,
                      metrics=None,
                      lr_scheduler=None,
                      train_recorder=train_recorder)

    logging.info('begin training. ')
    trainer.train()
コード例 #15
0
    model = getattr(model_def, args.arch)()

    model.cuda()
    train_loader, test_loader = data_loader(batch_size=args.batch_size,
                                            n_workers=args.workers,
                                            dataset=args.dataset)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum, nesterov=True)

    prev_state = None
    if args.resume:
        prev_state = torch.load('references/{}_checkpoint.th'.format(args.arch))

    epoch_time = AverageMeter()
    rec = Recorder()
    all_start_time = time.time()
    start_epoch = 0
    if prev_state:
        print()
        model.load_state_dict(prev_state['model_state'])
        optimizer.load_state_dict(prev_state['optimizer_state'])
        epoch_time = prev_state['epoch_time']
        rec = prev_state['records']
        all_start_time -= prev_state['training_time']
        print('Overriding provided arg with prev_state args: ', prev_state['args'])
        args = prev_state['args']
        start_epoch = prev_state['epoch']

    scheduler = None
    if args.scheduler == 'exponential':
コード例 #16
0
ファイル: rbqe.py プロジェクト: RyanXingQL/PowerQE
    def test(self, data_fetcher, num_samples, if_baseline=False, if_return_each=False, img_save_folder=None,
             if_train=True):
        """
        val (in training): idx_out=0/1/2/3/4
        test: idx_out=-2, record time wo. iqa
        """
        if if_baseline or if_train:
            assert self.crit_lst is not None, 'NO METRICS!'

        if self.crit_lst is not None:
            if_tar_only = False
            msg = 'dst vs. src | ' if if_baseline else 'tar vs. src | '
        else:
            if_tar_only = True
            msg = 'only get dst | '

        report_dict = None

        recorder_dict = dict()
        for crit_name in self.crit_lst:
            recorder_dict[crit_name] = Recorder()

        write_dict_lst = []
        timer = CUDATimer()

        # validation baseline: no iqa, no parse name
        # validation, not baseline: no iqa, parse name
        # test baseline: no iqa, no parse name
        # test, no baseline, iqa, no parse name
        if_iqa = True if (not if_train) and (not if_baseline) else False
        if if_iqa:
            timer_wo_iqam = Recorder()
            idx_out = -2  # testing; judge by IQAM
        if_parse_name = True if if_train and (not if_baseline) else False

        self.set_eval_mode()

        data_fetcher.reset()
        test_data = data_fetcher.next()
        assert len(test_data['name']) == 1, 'ONLY SUPPORT bs==1!'

        pbar = tqdm(total=num_samples, ncols=100)

        while test_data is not None:
            im_lq = test_data['lq'].cuda(non_blocking=True)  # assume bs=1
            im_name = test_data['name'][0]  # assume bs=1

            if if_parse_name:
                im_type = im_name.split('_')[-1].split('.')[0]
                if im_type in ['qf50', 'qp22']:
                    idx_out = 0
                elif im_type in ['qf40', 'qp27']:
                    idx_out = 1
                elif im_type in ['qf30', 'qp32']:
                    idx_out = 2
                elif im_type in ['qf20', 'qp37']:
                    idx_out = 3
                elif im_type in ['qf10', 'qp42']:
                    idx_out = 4
                else:
                    raise Exception(f"im_type IS {im_type}, NO MATCHING TYPE!")

            timer.start_record()
            if if_tar_only:
                if if_iqa:
                    time_wo_iqa, im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out).clamp_(0., 1.)
                else:
                    im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out).clamp_(0., 1.)
                timer.record_inter()
            else:
                im_gt = test_data['gt'].cuda(non_blocking=True)  # assume bs=1
                if if_baseline:
                    im_out = im_lq
                else:
                    if if_iqa:
                        time_wo_iqa, im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out)
                        im_out = im_out.clamp_(0., 1.)
                    else:
                        im_out = self.model.net[self.model.infer_subnet](inp_t=im_lq, idx_out=idx_out).clamp_(0., 1.)
                timer.record_inter()

                _msg = f'{im_name} | '

                for crit_name in self.crit_lst:
                    crit_fn = self.crit_lst[crit_name]['fn']
                    crit_unit = self.crit_lst[crit_name]['unit']

                    perfm = crit_fn(torch.squeeze(im_out, 0), torch.squeeze(im_gt, 0))
                    recorder_dict[crit_name].record(perfm)

                    _msg += f'[{perfm:.3e}] {crit_unit:s} | '

                _msg = _msg[:-3]
                if if_return_each:
                    msg += _msg + '\n'
                pbar.set_description(_msg)

            if if_iqa:
                timer_wo_iqam.record(time_wo_iqa)

            if img_save_folder is not None:  # save im
                im = tensor2im(torch.squeeze(im_out, 0))
                save_path = img_save_folder / (str(im_name) + '.png')
                cv2.imwrite(str(save_path), im)

            pbar.update()
            test_data = data_fetcher.next()
        pbar.close()

        if not if_tar_only:
            for crit_name in self.crit_lst:
                crit_unit = self.crit_lst[crit_name]['unit']
                crit_if_focus = self.crit_lst[crit_name]['if_focus']

                ave_perfm = recorder_dict[crit_name].get_ave()
                msg += f'{crit_name} | [{ave_perfm:.3e}] {crit_unit} | '

                write_dict_lst.append(dict(tag=f'{crit_name} (val)', scalar=ave_perfm))

                if crit_if_focus:
                    report_dict = dict(ave_perfm=ave_perfm, lsb=self.crit_lst[crit_name]['fn'].lsb)

        ave_fps = 1. / timer.get_ave_inter()
        msg += f'ave. fps | [{ave_fps:.1f}]'

        if if_iqa:
            ave_time_wo_iqam = timer_wo_iqam.get_ave()
            fps_wo_iqam = 1. / ave_time_wo_iqam
            msg += f' | ave. fps wo. IQAM | [{fps_wo_iqam:.1f}]'

        if if_train:
            assert report_dict is not None
            return msg.rstrip(), write_dict_lst, report_dict
        else:
            return msg.rstrip()
コード例 #17
0
def main():
    # fetch arguments
    args = parse_args()
    # initialize logger
    logger = SysLogger(LOGFILE)
    recorder = Recorder(RECORDER_FILE)
    logger.info('starting...')
    rl_knobs = knobs.get_rl_knobs(args.scenario)
    pso_knobs = knobs.get_pso_knobs(args.scenario)
    bm = benchmark.get_benchmark_instance(args.benchmark)
    env = DB_Env(db_info=args.db_info, benchmark=bm, recorder=recorder)

    if len(rl_knobs) == 0 and args.is_train:
        print(SysLogger)
        logger.print(
            'current mode is training, so you must set reinforcement learning knobs.',
            fd=SysLogger.stderr)
        return -1

    # reinforcement learning
    if len(rl_knobs) > 0:
        env.set_tuning_knobs(rl_knobs)
        # lazy loading. Because loading tensorflow has to cost too much time.
        from algorithms.rl_agent import RLAgent
        rl = RLAgent(env, agent='ddpg')

        if args.is_train:
            rl.fit(STEPS, nb_max_episode_steps=NB_MAX_EPISODE_STEPS)
            rl.save(args.model_path)
            logger.print('saved model at %s' % args.model_path)
            return 0  # training mode stop here.
        if not args.model_path:
            from sys import stderr
            print('have no model path, you can use --model-path argument.',
                  file=stderr,
                  flush=True)
            exit(-1)
        rl.load(args.model_path)
        rl.test(TEST_EPISODES, nb_max_episode_steps=NB_MAX_EPISODE_STEPS)

        recorder.write_best_val('reward')
    # heuristic algorithm
    if len(pso_knobs) > 0:
        env.set_tuning_knobs(pso_knobs)

        def heuristic_callback(v):
            s, r, d, _ = env.step(v, False)
            return -r  # - reward

        pso = Pso(func=heuristic_callback,
                  dim=len(pso_knobs),
                  particle_nums=3,
                  max_iteration=100,
                  x_min=0,
                  x_max=1,
                  max_vel=0.5)
        pso.update()

        # if you have other approaches, you can code here.

        recorder.write_best_val('reward')
    logger.print('please see result at logfile: %s.' % RECORDER_FILE)
コード例 #18
0
class Trainer():
    def __init__(self,
                 model,
                 dataset,
                 ctx=-1,
                 batch_size=128,
                 optimizer='sgd',
                 lambdas=[0.1, 0.1],
                 print_freq=32):
        self.model = model
        self.dataset = dataset
        self.batch_size = batch_size
        self.optbb = optim.SGD(chain(self.model.age_classifier.parameters(),
                                     self.model.RFM.parameters(),
                                     self.model.margin_fc.parameters(),
                                     self.model.backbone.parameters()),
                               lr=0.01,
                               momentum=0.9)
        self.optDAL = optim.SGD(self.model.DAL.parameters(),
                                lr=0.01,
                                momentum=0.9)
        self.lambdas = lambdas
        self.print_freq = print_freq
        self.id_recorder = Recorder()
        self.age_recorder = Recorder()
        self.trainingDAL = False
        if ctx < 0:
            self.ctx = torch.device('cpu')
        else:
            self.ctx = torch.device(f'cuda:{ctx}')

    def train(self, epochs, start_epoch, save_path=None):
        self.train_ds = ImageFolderWithAgeGroup(self.dataset['pat'], self.dataset['pos'], \
                        age_cutoffs, self.dataset['train_root'], transform=transforms.Compose(\
                            [transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), \
                                transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))
        self.train_ld = DataLoader(self.train_ds,
                                   shuffle=True,
                                   batch_size=self.batch_size)
        if self.dataset['val_root'] is not None:
            self.val_ds = ImageFolderWithAgeGroup(self.dataset['pat'], self.dataset['pos'], age_cutoffs, self.dataset['val_root'], \
                        transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))
            self.val_ld = DataLoader(self.val_ds,
                                     shuffle=True,
                                     batch_size=self.batch_size)
        self.model = self.model.to(self.ctx)
        for epoch in range(epochs):
            print(f'---- epoch {epoch} ----')
            self.update()
            if self.dataset['val_root'] is not None:
                acc = self.validate()
            else:
                acc = -1.
            if save_path is not None:
                torch.save(
                    self.model.state_dict(),
                    os.path.join(save_path,
                                 f'{start_epoch+epoch}_{acc:.4f}.state'))

    def update(self):
        print('    -- Training --')
        self.model.train()
        self.id_recorder.reset()
        self.age_recorder.reset()
        for i, (xs, ys, agegrps) in enumerate(self.train_ld):
            if i % 70 == 0:  # canonical maximization procesure
                self.set_train_mode(False)
            elif i % 70 == 20:  #  RFM optimize procesure
                self.set_train_mode(True)
            xs, ys, agegrps = xs.to(self.ctx), ys.to(self.ctx), agegrps.to(
                self.ctx)
            idLoss, id_acc, ageLoss, age_acc, cc = self.model(xs, ys, agegrps)
            #print(f'        ---\n{idLoss}\n{id_acc}\n{ageLoss}\n{age_acc}\n{cc}')
            total_loss = idLoss + ageLoss * self.lambdas[
                0] + cc * self.lambdas[1]
            self.id_recorder.gulp(len(agegrps), idLoss.item(), id_acc.item())
            self.age_recorder.gulp(len(agegrps), ageLoss.item(),
                                   age_acc.item())
            if i % self.print_freq == 0:
                print(
                    f'        iter: {i} {i%70} total loss: {total_loss.item():.4f} ({idLoss.item():.4f}, {id_acc.item():.4f}, {ageLoss.item():.4f}, {age_acc.item():.4f}, {cc.item():.8f})'
                )
            if self.trainingDAL:
                self.optDAL.zero_grad()
                total_loss.backward()
                Trainer.flip_grads(self.model.DAL)
                self.optDAL.step()
            else:
                self.optbb.zero_grad()
                total_loss.backward()
                self.optbb.step()
        # show average training meta after epoch
        print(f'        {self.id_recorder.excrete().result_as_string()}')
        print(f'        {self.age_recorder.excrete().result_as_string()}')

    def validate(self):
        print('    -- Validating --')
        self.model.eval()
        self.id_recorder.reset()
        self.age_recorder.reset()
        for i, (xs, ys, agegrps) in enumerate(self.val_ld):
            xs, ys, agegrps = xs.to(self.ctx), ys.to(self.ctx), agegrps.to(
                self.ctx)
            with torch.no_grad():
                idLoss, id_acc, ageLoss, age_acc, cc = self.model(
                    xs, ys, agegrps)
                total_loss = idLoss + ageLoss * self.lambdas[
                    0] + cc * self.lambdas[1]
                self.id_recorder.gulp(len(agegrps), idLoss.item(),
                                      id_acc.item())
                self.age_recorder.gulp(len(agegrps), ageLoss.item(),
                                       age_acc.item())
        # show average validation meta after epoch
        print(f'        {self.id_recorder.excrete().result_as_string()}')
        print(f'        {self.age_recorder.excrete().result_as_string()}')
        return self.id_recorder.acc

    def set_train_mode(self, state):
        self.trainingDAL = not state
        Trainer.set_grads(self.model.RFM, state)
        Trainer.set_grads(self.model.backbone, state)
        Trainer.set_grads(self.model.margin_fc, state)
        Trainer.set_grads(self.model.age_classifier, state)
        Trainer.set_grads(self.model.DAL, not state)

    @staticmethod
    def set_grads(mod, state):
        for para in mod.parameters():
            para.requires_grad = state

    @staticmethod
    def flip_grads(mod):
        for para in mod.parameters():
            if para.requires_grad:
                para.grad = -para.grad
コード例 #19
0
ファイル: main.py プロジェクト: arkitahara/MatSeg
def main(args):
    if args.seed:
        np.random.seed(int(args.seed))
        torch.backends.cudnn.deterministic = True
        torch.manual_seed(0)
    config = get_config(args.dataset, args.version)
    method = config['model']
    criterion = nn.CrossEntropyLoss().cuda()
    try:
        model = model_mappings[method](K=config['n_class']).cuda()
    except KeyError:
        print('%s model does not exist' % method)
        sys.exit(1)

    model_dir = './saved/%s_%s.pth' % (config['name'], method)
    if args.mode == 'train':
        log_dir = './log/%s_%s.log' % (config['name'], method)
        train_loader, validation_loader = get_dataloader(config)
        if config['optimizer'] == 'Adam':
            optimizer = optim.Adam(model.parameters(),
                                   lr=config['lr'],
                                   weight_decay=5e-4)
        elif config['optimizer'] == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=config['lr'],
                                  momentum=0.9,
                                  weight_decay=5e-4)
        else:
            print('cannot found %s optimizer' % config['optimizer'])
            sys.exit(1)

        scheduler = ReduceLROnPlateau(optimizer, patience=3)
        recorder = Recorder(('loss_train', 'acc_train', 'loss_val', 'acc_val'))
        iou_val_max = 0
        for epoch in range(1, config['epoch'] + 1):
            print('Epoch %s:' % epoch)
            loss_train, acc_train = train(config,
                                          model,
                                          criterion,
                                          optimizer,
                                          train_loader,
                                          method=method)
            loss_val, acc_val, iou_val = evaluate(config,
                                                  model,
                                                  criterion,
                                                  validation_loader,
                                                  method=method)
            scheduler.step(loss_train)

            # update loss and accuracy per epoch
            recorder.update((loss_train, acc_train, loss_val, acc_val))

            # save model with higher iou
            if iou_val > iou_val_max and args.save:
                torch.save(recorder.record, log_dir)
                torch.save(
                    {
                        'epoch': epoch,
                        'version': args.version,
                        'model_state_dict': model.state_dict(),
                    }, model_dir)
                print(
                    'validation iou improved from %.5f to %.5f. Model Saved.' %
                    (iou_val_max, iou_val))
                iou_val_max = iou_val

    elif args.mode == 'evaluate':
        test_dir = '%s/%s' % (config['root'], args.test_folder)
        test_set = Dataset(test_dir, config['size'],
                           *get_transform(config, is_train=False))
        test_loader = DataLoader(test_set,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0,
                                 drop_last=False)
        model.load_state_dict(torch.load(model_dir)['model_state_dict'])

        # save prediction results, make directory if not exists
        save_dir = '%s/predictions/%s_%s' % (test_dir, args.version, method)
        if not os.path.isdir('%s/predictions' % test_dir):
            os.mkdir('%s/predictions' % test_dir)
        if not os.path.isdir(save_dir):
            os.mkdir(save_dir)
        evaluate(config,
                 model,
                 criterion,
                 test_loader,
                 method=method,
                 test_flag=True,
                 save_dir=save_dir)

    else:
        print('%s mode does not exist' % args.mode)
コード例 #20
0
def main(args):
    # Defines configuration dictionary and network architecture to use
    config = get_config(args.dataset, args.version)
    method = config['model']

    # Defines the loss function. Takes a tensor as argument to initiate class balancing,
    # which can be obtained from the balance script. Uncomment argument below.
    if config['balance'] and args.gpu and torch.cuda.is_available():
        criterion = nn.CrossEntropyLoss(weight=balance(config)).cuda()
    elif config['balance']:
        criterion = nn.CrossEntropyLoss(weight=balance(config))
    elif args.gpu and torch.cuda.is_available():
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss()

    # Maps configuration method to network class defined in models.py
    try:
        if args.gpu and torch.cuda.is_available():
            model = model_mappings[method](K=config['n_class']).cuda()
        else:
            model = model_mappings[method](K=config['n_class'])
    except KeyError:
        print('%s model does not exist' % method)
        sys.exit(1)

    if args.mode == 'train':
        # Starts training time to be completed at end of conditional statement
        start = time.time()

        # Defines directory for trained network, training log, and training plot
        # respectively; create these directories in MatSeg if this is not already done.
        model_dir = './saved/%s_%s.pth' % (config['name'], method)
        log_dir = './log/%s_%s.log' % (config['name'], method)
        plot_dir = './plots/%s_%s.png' % (config['name'], method)

        # Obtains iterable data sets from function above
        train_loader, validation_loader = get_dataloader(config)

        # Conditional outlining choice of optimizer; includes hard-coded hyperparameters
        if config['optimizer'] == 'Adam':
            optimizer = optim.Adam(model.parameters(),
                                   lr=config['lr'],
                                   weight_decay=5e-4)
        elif config['optimizer'] == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=config['lr'],
                                  momentum=0.9,
                                  weight_decay=5e-4)
        else:
            print('cannot found %s optimizer' % config['optimizer'])
            sys.exit(1)

        # Defines dynamic learning rate reduction. Patience defines the number of epochs after
        # which to reduce the LR should training loss not decrease in those epochs.
        scheduler = ReduceLROnPlateau(optimizer, patience=config['patience'])

        # Gives entries in the Recorder object to measure; obtained from evaluate function
        recorder = Recorder(('loss_train', 'acc_train', 'loss_val', 'acc_val',
                             'mean_iou', 'class_precision', 'class_iou'))
        iou_val_max = 0

        # Iterate through number of epochs
        for epoch in range(1, config['epoch'] + 1):
            gc.collect()
            print('Epoch %s:' % epoch)
            loss_train, acc_train = train(config,
                                          model,
                                          criterion,
                                          optimizer,
                                          train_loader,
                                          method=method,
                                          gpu=args.gpu)
            loss_val, acc_val, iou_val, class_precision, class_iou = evaluate(
                config,
                model,
                criterion,
                validation_loader,
                gpu=args.gpu,
                method=method)

            # Update learning rate scheduler based on training loss
            scheduler.step(loss_train)

            # Update metrics in Recorder object for each epoch
            recorder.update((loss_train, acc_train, loss_val, acc_val, iou_val,
                             class_precision, class_iou))

            # Save model with higher mean IoU
            if iou_val > iou_val_max and args.save:
                torch.save(recorder.record, log_dir)
                torch.save(
                    {
                        'epoch': epoch,
                        'version': args.version,
                        'model_state_dict': model.state_dict(),
                    }, model_dir)
                print(
                    'validation iou improved from %.5f to %.5f. Model Saved.' %
                    (iou_val_max, iou_val))
                iou_val_max = iou_val

            # Stop training if learning rate is reduced three times or (commented out) if validation loss
            # loss does not decrease for 20 epochs. Otherwise, continue training.
            if (optimizer.param_groups[0]['lr'] / config['lr']) <= 1e-3:
                print('Learning Rate Reduced to 1e-3 of Original Value',
                      'Training Stopped',
                      sep='\n')
                epochs = epoch
                break
            # elif all(recorder['loss_val'][-20:][i] <= recorder['loss_val'][-20:][i+1] for i in range(19)):
            #     print('Loss has not decreased for previous 20 epochs', 'Training Stopped', sep='\n')
            #     epochs = epoch
            #     break
            else:
                epochs = epoch
                continue

        # Obtain time after all epochs, compute total training time, print and plot results
        end = time.time()
        time_taken = end - start
        print(recorder.record)
        plotting(recorder.record, config, start, time_taken, plot_dir, epochs)

    elif args.mode == 'evaluate':
        # Load test data into and iterable dataset with no augmentation and verbose metrics
        test_dir = '%s/%s' % (config['root'], args.test_folder)
        test_set = Dataset(test_dir, config['size'],
                           *get_transform(config, is_train=False))
        test_loader = DataLoader(test_set,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0,
                                 drop_last=False)

        # Load desired trained network from saved directory
        model_dir = './saved/%s_%s.pth' % (config['name'], method)
        model.load_state_dict(torch.load(model_dir)['model_state_dict'])

        # Define directories to which to save predictions and overlays respectively, and create them if necessary
        save_dir = '%s/predictions/%s_%s' % (test_dir, args.version, method)
        overlay_dir = '%s/overlays/%s_%s' % (test_dir, args.version, method)
        labels_dir = os.path.join(test_dir, 'labels_npy')
        if not os.path.isdir('%s/predictions' % test_dir):
            os.mkdir('%s/predictions' % test_dir)
        if not os.path.isdir(save_dir):
            os.mkdir(save_dir)
        evaluate(config,
                 model,
                 criterion,
                 test_loader,
                 gpu=args.gpu,
                 method=method,
                 test_flag=True,
                 save_dir=save_dir)

        # Creates overlays if this is specified in the command line
        if os.path.isdir(labels_dir) and args.overlay:
            if not os.path.isdir(overlay_dir):
                os.makedirs(overlay_dir)
            overlay(labels_dir, save_dir, overlay_dir, config['n_class'])

    else:
        print('%s mode does not exist' % args.mode)
コード例 #21
0
ファイル: main.py プロジェクト: minhoe/hyundai-destpred
def train_eval_save(car_id_list, dest_term, model_id, n_save_viz=0):
  """
  TRAIN and EVAL for given car and experimental settings
  """
  # Load datasets
  path_trn, meta_trn, dest_trn, dt_trn, full_path_trn, \
  path_tst, meta_tst, dest_tst, dt_tst, full_path_tst = \
      unified_latest_seqdata(car_id_list, proportion_list, dest_term,
                             train_ratio=0.8,
                             seq_len=FLAGS.seq_len, 
                             data_dir=DATA_DIR)

  print('trn_data:', path_trn.shape, dest_trn.shape)
  print('tst_data:', path_tst.shape, dest_tst.shape)

  # Define model dir
  model_dir = os.path.join(MODEL_DIR, 
                           'dest_type_%d' % dest_term, 
                           'minibatch',
                           model_id)
  model = Model(model_dir)
  FLAGS.train = FLAGS.train or model.latest_checkpoint is None
  
  # Build graph and initialize all variables
  model.build_graph()
  model.init_or_restore_all_variables(restart=FLAGS.restart)

  # TRAIN PART
  if FLAGS.train:
    # model.print_all_trainable_variables()
    model.train(path_trn, meta_trn, dest_trn)

  # TEST EVALUATION PART
  # FOR TARGETING CARS
  for car_id in car_id_list:
    # LOAD DATA
    path_trn, meta_trn, dest_trn, dt_trn, full_path_trn, \
    path_tst, meta_tst, dest_tst, dt_tst, full_path_tst = \
        unified_latest_seqdata([car_id], proportion_list, dest_term,
                                train_ratio=0.8,
                                seq_len=FLAGS.seq_len, 
                                data_dir=DATA_DIR)

    # dist_tst = model.eval_dist(path_tst, meta_tst, dest_tst)
    # recorder = Recorder('PATHWISE_' + RECORD_FNAME)
    # for i in tqdm(range(len(dist_tst))):
    #     recorder.append_values(
    #         ['car{:03}'.format(car_id) if isinstance(car_id, int) else 'car' + car_id,
    #          dt_tst[i], *meta_tst[i], dist_tst[i]])
    #     recorder.next_line()

    if FLAGS.record:
      log.info('save the results to %s', RECORD_FNAME)
      global_step = model.latest_step
      loss_trn = model.eval_mean_distance(path_trn, meta_trn, dest_trn)
      loss_tst = model.eval_mean_distance(path_tst, meta_tst, dest_tst)
      print('car_id:', car_id, 'trn_data:', path_trn.shape, dest_trn.shape, end='--')
      print(loss_trn, loss_tst)

      # SAVE THE RESULT INTO CSV
      recorder = Recorder(RECORD_FNAME)
      recorder.append_values(['car{:03}'.format(car_id) if isinstance(car_id, int) else 'car' + car_id,
                              model_id, 
                              len(path_trn), 
                              len(path_tst), 
                              global_step,
                              loss_trn, 
                              loss_tst])
      recorder.next_line()

    if n_save_viz > 0:
      # DEFINE PLOT AND GET PRED POINTS
      pred_tst = model.predict(path_tst, meta_tst)
      myplot = ResultPlot()
      myplot.add_point(
            path_trn, label=None,
            color='lightgray', marker='.', s=10, alpha=1, must_contain=False)
      myplot.add_point(
            dest_trn, label=None,
            color='gray', marker='.', s=10, alpha=1, must_contain=False)

      # PLOT ALL TEST ERRORS
      for i in range(pred_tst.shape[0]):
        difference = np.stack([dest_tst[i], pred_tst[i]], axis=0)
        myplot.add_tmp_path(
            difference, label=None, 
            color='lightblue', marker=None, must_contain=True)
        myplot.add_tmp_point(
            dest_tst[i], label=None,
            color='mediumblue', marker='*', s=100, alpha=1, must_contain=True)
        myplot.add_tmp_point(
            pred_tst[i], label=None,
            color='crimson', marker='*', s=100, alpha=1, must_contain=True)
      dist_km = dist(dest_tst, pred_tst, to_km=True)

      # Define details to save plot
      save_dir = os.path.join(VIZ_DIR, 
                              'path_and_prediction', 
                              'dest_term_%d' % dest_term, 
                              'car_%03d' % car_id)
      fname = model_id + '.png'
      title = '{fname}\ndist={dist_km}km'
      title = title.format(fname=fname,
                          dist_km='N/A' if dist_km is None else '%.1f' % dist_km)
      myplot.draw_and_save(title, save_dir, fname)

      # FOR EACH TRIP
      for i in range(n_save_viz):
        myplot.add_tmp_path(
              full_path_tst[i], label=None,
              color='lightblue', marker='.', must_contain=True)
        myplot.add_tmp_path(
            path_tst[i], label='input_path', 
            color='mediumblue', marker='.', must_contain=True)

        dest_true, dest_pred = dest_tst[i], pred_tst[i]
        myplot.add_tmp_point(
            dest_true, label='true_destination',
            color='mediumblue', marker='*', s=100, alpha=1, must_contain=True)
        myplot.add_tmp_point(
            dest_pred, label='pred_destination',
            color='crimson', marker='*', s=100, alpha=1, must_contain=True)

        start_time = convert_time_for_fname(dt_tst[i])
        dist_km = dist(dest_pred, dest_true, to_km=True)

        # Define details to save plot
        save_dir = os.path.join(VIZ_DIR, 
                                'path_and_prediction', 
                                'dest_term_%d' % dest_term, 
                                'car_%03d' % car_id, 
                                'start_%s' % start_time)
        fname = model_id + '.png'
        title = '{datetime}\n{fname}\ndist={dist_km}km'
        title = title.format(fname=fname,
                            datetime=start_time,
                            dist_km='N/A' if dist_km is None else '%.1f' % dist_km)
        myplot.draw_and_save(title, save_dir, fname)


  # Close tf session to release GPU memory
  model.close_session()