Ejemplo n.º 1
0
def upload_experiment():
    experiment = Experiment(**COMET_ML_KEY)
    experiment.log_asset_folder('./datasets')
    experiment.log_asset_folder('./models')
    experiment.log_asset_folder('./knapsack')
    experiment.log_asset(RESULT_FILE)
Ejemplo n.º 2
0
                break

        # Save training history
        history_file = os.path.join(output_dir,
                                    experiment_name + "_history.npz")
        save_history(history_file, history)
        experiment.log_asset(history_file)

        end_time = time.time()
        print("Training took " + str(('%.3f' % (end_time - start_time))) +
              " seconds for " + str(num_epochs) + " epochs")

        print("------------------------------------")
        print("Saving model...")
        checkpointer.save(global_step)
        experiment.log_asset_folder(checkpoint_dir)

    if testing:
        # Test the model
        print("------------------------------------")
        print("Testing model...")

        # Load if best weights exists
        best_weights_file = checkpointer.get_best_weights()
        if load_model and best_weights_file and os.path.exists(
                best_weights_file):
            model.load_weights(best_weights_file)
            print("Loaded model weights from: " + best_weights_file)

        start_time = time.time()
        print("Testing started: " +
Ejemplo n.º 3
0
                    best_sel_acc = val_acc[1][1]
                    best_sel_idx = i + 1
                    torch.save(
                        model.sel_pred.state_dict(),
                        'saved_model/epoch%d.sel_model%s' %
                        (i + 1, args.suffix))
                    torch.save(model.sel_pred.state_dict(), sel_m)
                    if args.train_emb:
                        torch.save(
                            model.sel_embed_layer.state_dict(),
                            'saved_model/epoch%d.sel_embed%s' %
                            (i + 1, args.suffix))
                        torch.save(model.sel_embed_layer.state_dict(), sel_e)
            if TRAIN_COND:
                if val_acc[1][2] > best_cond_acc:
                    best_cond_acc = val_acc[1][2]
                    best_cond_idx = i + 1
                    torch.save(
                        model.cond_pred.state_dict(),
                        'saved_model/epoch%d.cond_model%s' %
                        (i + 1, args.suffix))
                    torch.save(model.cond_pred.state_dict(), cond_m)
                    if args.train_emb:
                        torch.save(
                            model.cond_embed_layer.state_dict(),
                            'saved_model/epoch%d.cond_embed%s' %
                            (i + 1, args.suffix))
                        torch.save(model.cond_embed_layer.state_dict(), cond_e)

            experiment.log_asset_folder('saved_model')
Ejemplo n.º 4
0
                            parse_args=False,
                            disabled=True)
else:
    experiment = Experiment(api_key="jYkp7GiEE17RfR1iGGvF2rMTB",
                            project_name="mvbchallenge",
                            workspace="johnzhang1999",
                            parse_args=False)

name = args.arch + '_' + args.sources[0] + '_' + str(args.lr) + '_' + str(
    args.batch_size)
experiment.set_name(name)
if args.resume:
    experiment.add_tag('resume')
experiment.log_parameters(args.__dict__)
# asset logging is BUG-gy!
experiment.log_asset_folder(osp.expanduser('log/'))
experiment.log_asset_folder(osp.expanduser('runs/'))


def build_datamanager(args):
    if args.app == 'image':
        return torchreid.data.ImageDataManager(**imagedata_kwargs(args))
    else:
        return torchreid.data.VideoDataManager(**videodata_kwargs(args))


def build_engine(args,
                 datamanager,
                 model,
                 optimizer,
                 scheduler,
実験2
"""
if '2' in args.experiments:
    exp2_result_df = model.robustness_experiments(experiment,
                                                  args.data_name,
                                                  training_rates_list=[0.03])
    exp2_result_df.to_csv(result_dir / 'exp2.csv')
"""
実験3
"""
if '3' in args.experiments:
    exp3_result_df = model.robustness_experiments(
        experiment,
        args.data_name,
        training_rates_list=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
    exp3_result_df.to_csv(result_dir / 'exp3.csv')
"""
実験4
"""
if '4' in args.experiments:
    exp4_result_df = model.inductive_learning_eval(
        args.exp4_select,
        experiment,
        args.data_name,
        rate_list=args.exp4_rate_list,
        iter_num=30)
    exp4_result_df.to_csv(result_dir / 'exp4.csv')

# comet-mlに保存
experiment.log_asset_folder(result_dir)
Ejemplo n.º 6
0
class ModelTrainer:
    def __init__(self, model, dataloader, args):
        self.model = model
        self.args = args
        self.data = dataloader
        self.metric = args.metric

        if (dataloader is not None):
            self.frq_log = len(dataloader['train']) // args.frq_log

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        model.to(self.device)

        if args.optimizer == 'sgd':
            self.optimizer = optim.SGD(model.parameters(),
                                       lr=args.lr,
                                       momentum=args.momentum,
                                       weight_decay=args.weight_decay)
        elif args.optimizer == 'adam':
            self.optimizer = optim.Adam(model.parameters(),
                                        lr=args.lr,
                                        betas=(args.beta1, 0.999),
                                        weight_decay=args.weight_decay)
        else:
            raise Exception('--optimizer should be one of {sgd, adam}')

        if args.scheduler == 'set':
            self.scheduler = optim.lr_scheduler.LambdaLR(
                self.optimizer,
                lambda epoch: 10**(epoch / args.scheduler_factor))
        elif args.scheduler == 'auto':
            self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer,
                mode='min',
                factor=args.scheduler_factor,
                patience=5,
                verbose=True,
                threshold=0.0001,
                threshold_mode='rel',
                cooldown=0,
                min_lr=0,
                eps=1e-08)

        self.experiment = Experiment(api_key=args.comet_key,
                                     project_name=args.comet_project,
                                     workspace=args.comet_workspace,
                                     auto_weight_logging=True,
                                     auto_metric_logging=False,
                                     auto_param_logging=False)

        self.experiment.set_name(args.name)
        self.experiment.log_parameters(vars(args))
        self.experiment.set_model_graph(str(self.model))

    def train_one_epoch(self, epoch):

        self.model.train()
        train_loader = self.data['train']
        train_loss = 0
        correct = 0

        comet_offset = epoch * len(train_loader)

        for batch_idx, (data, target) in tqdm(enumerate(train_loader),
                                              leave=True,
                                              total=len(train_loader)):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.cross_entropy(output, target, reduction='sum')
            loss.backward()
            self.optimizer.step()

            pred = output.argmax(dim=1, keepdim=True)
            acc = pred.eq(target.view_as(pred)).sum().item()
            train_loss += loss.item()
            correct += acc

            loss = loss.item() / len(data)
            acc = 100. * acc / len(data)

            comet_step = comet_offset + batch_idx
            self.experiment.log_metric('batch_loss', loss, comet_step, epoch)
            self.experiment.log_metric('batch_acc', acc, comet_step, epoch)

            if (batch_idx + 1) % self.frq_log == 0:
                self.experiment.log_metric('log_loss', loss, comet_step, epoch)
                self.experiment.log_metric('log_acc', acc, comet_step, epoch)
                print('Epoch: {} [{}/{}]\tLoss: {:.6f}\tAcc: {:.2f}%'.format(
                    epoch + 1, (batch_idx + 1) * len(data),
                    len(train_loader.dataset), loss, acc))

        train_loss /= len(train_loader.dataset)
        acc = 100. * correct / len(train_loader.dataset)

        comet_step = comet_offset + len(train_loader) - 1
        self.experiment.log_metric('loss', train_loss, comet_step, epoch)
        self.experiment.log_metric('acc', acc, comet_step, epoch)

        print(
            'Epoch: {} [Done]\tLoss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)'.format(
                epoch + 1, train_loss, correct, len(train_loader.dataset),
                acc))

        return {'loss': train_loss, 'acc': acc}

    def train(self):

        self.log_cmd()
        best = -1
        history = {'lr': [], 'train_loss': []}

        try:
            print(">> Training %s" % self.model.name)
            for epoch in range(self.args.nepoch):
                with self.experiment.train():
                    train_res = self.train_one_epoch(epoch)

                with self.experiment.validate():
                    print("\nvalidation...")
                    comet_offset = (epoch + 1) * len(self.data['train']) - 1
                    res = self.val(self.data['val'], comet_offset, epoch)

                if res[self.metric] > best:
                    best = res[self.metric]
                    self.save_weights(epoch)

                if self.args.scheduler == 'set':
                    lr = self.optimizer.param_groups[0]['lr']
                    history['lr'].append(lr)
                    history['train_loss'].append(train_res['loss'])

                    self.scheduler.step(epoch + 1)
                    lr = self.optimizer.param_groups[0]['lr']
                    print('learning rate changed to: %.10f' % lr)

                elif self.args.scheduler == 'auto':
                    self.scheduler.step(train_res['loss'])
        finally:
            print(">> Training model %s. [Stopped]" % self.model.name)
            self.experiment.log_asset_folder(os.path.join(
                self.args.outf, self.args.name, 'weights'),
                                             step=None,
                                             log_file_name=False,
                                             recursive=False)
            if self.args.scheduler == 'set':
                plt.semilogx(history['lr'], history['train_loss'])
                plt.grid(True)
                self.experiment.log_figure(figure=plt)
                plt.show()

    def val(self, val_loader, comet_offset=-1, epoch=-1):
        self.model.eval()
        test_loss = 0
        correct = 0

        labels = list(range(self.args.nclass))
        cm = np.zeros((len(labels), len(labels)))

        with torch.no_grad():
            for data, target in tqdm(val_loader,
                                     leave=True,
                                     total=len(val_loader)):
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                test_loss += F.cross_entropy(output, target,
                                             reduction='sum').item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

                pred = pred.view_as(target).data.cpu().numpy()
                target = target.data.cpu().numpy()
                cm += confusion_matrix(target, pred, labels=labels)

        test_loss /= len(val_loader.dataset)
        accuracy = 100. * correct / len(val_loader.dataset)

        print('Evaluation: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.
              format(test_loss, correct, len(val_loader.dataset), accuracy))

        res = {'loss': test_loss, 'acc': accuracy}

        self.experiment.log_metrics(res, step=comet_offset, epoch=epoch)
        self.experiment.log_confusion_matrix(
            matrix=cm,
            labels=[ClassDict.getName(x) for x in labels],
            title='confusion matrix after epoch %03d' % epoch,
            file_name="confusion_matrix_%03d.json" % epoch)

        return res

    def test(self):
        self.load_weights()
        with self.experiment.test():
            print('\ntesting....')
            res = self.val(self.data['test'])

    def log_cmd(self):
        d = vars(self.args)
        cmd = '!python main.py \\\n'
        tab = '    '

        for k, v in d.items():
            if v is None or v == '' or (isinstance(v, bool) and v is False):
                continue

            if isinstance(v, bool):
                arg = '--{} \\\n'.format(k)
            else:
                arg = '--{} {} \\\n'.format(k, v)

            cmd = cmd + tab + arg

        # print(cmd);
        self.experiment.log_text(cmd)

    def save_weights(self, epoch: int):

        weight_dir = os.path.join(self.args.outf, self.args.name, 'weights')
        if not os.path.exists(weight_dir):
            os.makedirs(weight_dir)

        torch.save({
            'epoch': epoch,
            'state_dict': self.model.state_dict()
        }, os.path.join(weight_dir, 'model.pth'))

    def load_weights(self):

        path_g = self.args.weights_path

        if path_g is None:
            weight_dir = os.path.join(self.args.outf, self.args.name,
                                      'weights')
            path_g = os.path.join(weight_dir, 'model.pth')

        print('>> Loading weights...')
        weights_g = torch.load(path_g, map_location=self.device)['state_dict']
        self.model.load_state_dict(weights_g)
        print('   Done.')

    def predict(self, x):
        x = x / 2**15
        self.model.eval()
        with torch.no_grad():
            x = torch.from_numpy(x).float()
            x = self.transform(x)
            x = x.unsqueeze(0)
            x = self.model(x)
            x = F.softmax(x, dim=1)
            x = x.numpy()
        return x
Ejemplo n.º 7
0
        opt)  # regular setup: load and print networks; create schedulers
    # create a website
    web_dir = os.path.join(
        opt.results_dir, opt.name,
        '%s_%s' % (opt.phase, opt.epoch))  # define the website directory
    webpage = html.HTML(
        web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' %
        (opt.name, opt.phase, opt.epoch))
    # test with eval mode. This only affects layers like batchnorm and dropout.
    # For [pix2pix]: we use batchnorm and dropout in the original pix2pix. You can experiment it with and without eval() mode.
    # For [CycleGAN]: It should not affect CycleGAN as CycleGAN uses instancenorm without dropout.
    if opt.eval:
        model.eval()
    for i, data in enumerate(dataset):
        if i >= opt.num_test:  # only apply our model to opt.num_test images.
            break
        model.set_input(data)  # unpack data from data loader
        model.test()  # run inference
        visuals = model.get_current_visuals()  # get image results
        img_path = model.get_image_paths()
        comet_exp.log_image(img_path[0])  # get image paths
        if i % 5 == 0:  # save images to an HTML file
            print('processing (%04d)-th image... %s' % (i, img_path))
        save_images(webpage,
                    visuals,
                    img_path,
                    aspect_ratio=opt.aspect_ratio,
                    width=opt.display_winsize)
    webpage.save()  # save the HTML
    comet_exp.log_asset_folder(webpage.get_image_dir())