コード例 #1
0
def main():
    global args
    args = parser.parse_args()

    cc = CrayonClient(port=8089)

    for name in args.name.split(','):
        shutil.rmtree(f'weights/{name}/', ignore_errors=True)
        shutil.rmtree(f'output/{name}/', ignore_errors=True)
        os.makedirs(f'weights/{name}')

        for fold in range(NUM_SPLITS):
            print(f'=> Targeting {name} fold {fold+1}/{NUM_SPLITS}')
            os.makedirs(f'output/{name}/fold{fold}/train')
            os.makedirs(f'output/{name}/fold{fold}/valid')

            arch = name.split('_')[0]
            model = models[arch](1)
            model = nn.DataParallel(model)
            model.cuda()

            train_loader, valid_loader, _ = get_loaders(args.batch_size, NUM_SPLITS, fold)

            train_eval(model, name, train_loader, valid_loader,
                       fold, make_experiment(cc, name, fold),
                       init_lr=args.lr, epochs=args.epochs,
                       num_epochs_per_decay=args.num_epochs_per_decay)

            del model
コード例 #2
0
ファイル: utils.py プロジェクト: HayeonLee/mlai-fewshot
 def __init__(self, args, logger, num=3):
     self.args = args
     self.logger = logger
     self.loaders = get_loaders(args, debug=True)
     self.model = None
     self.epoch = None
     self.num = 3
     self.fname = args.fname
     label = torch.arange(args.way).repeat(args.query)
     self.label = label.type(torch.cuda.LongTensor)
     self.p = args.shot * args.way
コード例 #3
0
def main(rank, world_size, arg):

    logger = Logger(arg.save_dir)
       
    setup(rank, world_size)
    print(rank)

    scaled_lr = arg.lr * arg.batch_size / 256
    arg.batch_size = int(arg.batch_size / world_size)
    num_workers = int(arg.num_workers / world_size)

    net, res = get_model(arg, classes=arg.num_classes) 
    logger.will_write(str(arg) + "\n")
    net.to(rank)
    net = nn.parallel.DistributedDataParallel(net, device_ids=[rank])
    
    if not arg.dali:
        train_loader, val_loader = get_loaders(arg.root, arg.batch_size, res, num_workers, arg.val_batch_size, color_jitter=arg.color_jitter, pca=arg.pca, crop_pct=arg.crop_pct)
    else:
        train_loader, val_loader = get_loaders_dali(arg.root, arg.batch_size, res, rank, world_size, num_workers)
    
    # net = nn.DataParallel(net).to(torch_device)
    loss = nn.CrossEntropyLoss()

    if not arg.no_filter_bias:
        parameters = add_weight_decay(net, weight_decay=arg.decay)
        weight_decay = 0
        print('filter out bias, bn and other 1d params from weight decay')
    else:
        parameters = net.parameters()
        weight_decay = arg.decay
    
    optim = {
        # "adam" : lambda : torch.optim.Adam(net.parameters(), lr=arg.lr, betas=arg.beta, weight_decay=arg.decay),
        "sgd": lambda : torch.optim.SGD(parameters, lr=scaled_lr, momentum=arg.momentum, nesterov=True, weight_decay=weight_decay),
        "rmsproptf": lambda : RMSpropTF(parameters, lr=scaled_lr, momentum=arg.momentum, eps=arg.eps, weight_decay=weight_decay),
        "rmsprop" : lambda : torch.optim.RMSprop(parameters, lr=scaled_lr, momentum=arg.momentum, eps=arg.eps, weight_decay=weight_decay)
    }[arg.optim]()

    scheduler = get_scheduler(optim, arg.scheduler, int(1.0 * len(train_loader)), arg.epoch * len(train_loader), warmup_t=int(arg.warmup * len(train_loader)), warmup_lr_init=0.1 * scaled_lr)

    arg.epoch = arg.epoch + arg.cool_down if arg.cool_down > 0 else arg.epoch
    model = Runner(arg, net, optim, rank, loss, logger, scheduler, world_size)

    if arg.profiler:
        model.profiler(train_loader, val_loader, train_loader.sampler)

    elif arg.test is False:
        if not arg.dali:
            model.train(train_loader, val_loader, train_loader.sampler)
        else:
            model.train(train_loader, val_loader)
        cleanup()
コード例 #4
0
ファイル: main.py プロジェクト: feifeiobama/Seq-Match
 def __init__(self, path=None):
     self.net = model.CompareAggregate()
     if config.use_cuda:
         self.net = self.net.cuda()
     if path != None:
         self.net.load_weight(path)
     self.train_loader, self.validation_loader, self.test_loader = loader.get_loaders(
     )
     self.summary = Summary(len(self.train_loader))
     self.optim = torch.optim.Adam(self.net.parameters(), lr=config.lr)
     self.epoch = self.step = 0
     self.last_map = 0
     print('Inititalize done')
コード例 #5
0

if __name__ == "__main__":
    arg = arg_parse()

    arg.save_dir = "%s/outs/%s" % (os.getcwd(), arg.save_dir)
    if os.path.exists(arg.save_dir) is False:
        os.mkdir(arg.save_dir)

    logger = Logger(arg.save_dir)
    logger.will_write(str(arg) + "\n")

    os.environ["CUDA_VISIBLE_DEVICES"] = arg.gpus
    torch_device = torch.device("cuda")

    train_loader, val_loader = get_loaders(arg.root, arg.batch_size, 224, arg.num_workers)

    if arg.model == "mixs":
        net = mixnet_s()
    else:
        pass

    net = nn.DataParallel(net).to(torch_device)
    loss = nn.CrossEntropyLoss()

    scaled_lr = arg.lr * arg.batch_size / 256
    optim = {
        "adam" : lambda : torch.optim.Adam(net.parameters(), betas=arg.beta, weight_decay=arg.decay),
        "rmsprop" : lambda : torch.optim.RMSprop(net.parameters(), lr=scaled_lr, momentum=arg.momentum, eps=arg.eps, weight_decay=arg.decay)
    }[arg.optim]()
コード例 #6
0
ファイル: utils.py プロジェクト: HayeonLee/mlai-fewshot
def debug_sample(args):
    db_loaders = get_loaders(args, debug=True)
    sample = {'train': _debug_sample(db_loaders['train'], args.way * args.shot),
              'val': _debug_sample(db_loaders['test'], args.way *args.shot)}
    return sample
コード例 #7
0
ファイル: main.py プロジェクト: zsef123/MixNet-PyTorch

if __name__ == "__main__":
    arg = arg_parse()

    arg.save_dir = "%s/outs/%s" % (os.getcwd(), arg.save_dir)
    if os.path.exists(arg.save_dir) is False:
        os.mkdir(arg.save_dir)

    logger = Logger(arg.save_dir)
    logger.will_write(str(arg) + "\n")

    os.environ["CUDA_VISIBLE_DEVICES"] = arg.gpus
    device = torch.device("cuda")
    train_loader, val_loader = get_loaders(arg.root,
                                           arg.batch_size,
                                           arg.num_workers,
                                           dtype=arg.dtype)

    if arg.model == "mixs":
        net = mixnet_s(num_classes=len(train_loader.dataset.classes))
    elif arg.model == "rw":
        import sys
        sys.path.append("rwightman")
        from timm.models.gen_efficientnet import mixnet_s
        net = mixnet_s(num_classes=len(train_loader.dataset.classes))
    else:
        from torchvision.models import resnet50
        net = resnet50(num_classes=len(train_loader.dataset.classes))

    net = nn.DataParallel(net)
    loss = nn.CrossEntropyLoss()
コード例 #8
0
ファイル: train.py プロジェクト: abtripathi/ImageClassifier
def main():

    model_names = sorted(name for name in models.__dict__\
    if name.islower() and not name.startswith("__") and callable(models.__dict__[name]) and ( name.startswith("vgg") or
                                                                                             name.startswith("alexnet")))
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "data_dir", help="name of the directory from where to load the data")
    parser.add_argument("--save_dir",
                        help="directory to save checkpoints(default :none)",
                        metavar='save')
    parser.add_argument("--arch",
                        choices=model_names,
                        default="vgg16",
                        help="Choose Architecture (default:vgg16)")
    parser.add_argument("--gpu",
                        action="store_true",
                        help="Use GPU for training")
    parser.add_argument("--learning_rate",
                        type=float,
                        default=0.003,
                        metavar='lr',
                        dest='learning_rate',
                        help="Learning Rate(default:0.003)")
    parser.add_argument("--epochs",
                        type=int,
                        default=1,
                        dest='epochs',
                        help="Number of Epochs for training(default:1)")
    parser.add_argument(
        "--print_every",
        type=int,
        default=5,
        metavar='P',
        dest='validate_every',
        help="Number of steps after which output should be printed(default:5)")
    parser.add_argument(
        "--skip_after",
        type=int,
        dest='skip_after',
        metavar='skip',
        help=
        "Number of steps after which training module should be exited(default:None)"
    )
    parser.add_argument(
        "--hidden_unit_1",
        "-fc1",
        metavar='fc1',
        type=int,
        dest='n_fc1',
        default=4096,
        help="Number of hidden units for layer 1(default:4096)")
    parser.add_argument(
        "--hidden_unit_2",
        "-fc2",
        metavar='fc2',
        type=int,
        default=2048,
        dest='n_fc2',
        help="Number of hidden units for layer 2(defaukt:2048)")
    parser.add_argument(
        "--hidden_unit_3",
        "-fc3",
        metavar='fc3',
        type=int,
        default=1024,
        dest='n_fc3',
        help="Number of hidden units for layer 3(default:1024)")
    args = parser.parse_args()

    if args.save_dir and not os.path.exists(args.save_dir):
        print("save directory doesn't exist.please try again")
        sys.exit(-1)

    if os.path.exists(args.data_dir):

        traindataloaders, validationdataloaders, class_to_idx = loader.get_loaders(
            args.data_dir)

        if traindataloaders and validationdataloaders:

            device = torch.device(
                'cuda' if torch.cuda.is_available() and args.gpu else 'cpu')
            model = image_classifier.init_classifier(args.arch, device,
                                                     args.n_fc1, args.n_fc2,
                                                     args.n_fc3)
            model.classifier.class_to_idx = class_to_idx
            optimizer = optim.Adam(model.classifier.parameters(),
                                   lr=args.learning_rate)
            criterion = nn.NLLLoss()
            dataloaders = [traindataloaders, validationdataloaders]
            with active_session():
                training_loss = image_classifier.train(
                    model,
                    dataloaders,
                    criterion,
                    device,
                    optimizer,
                    epochs=args.epochs,
                    validate_every=args.validate_every,
                    skip_after=args.skip_after)
            if args.save_dir:
                image_classifier.save_checkpoint(model, optimizer, args.epochs,
                                                 training_loss, args.save_dir,
                                                 args.arch)

        else:
            print(
                "data couldn't be read or no valid train or valid directory.Pleae check if /train and /valid exists"
            )

    else:
        print("data directory entered doesn't exists.Please try again")
コード例 #9
0
ファイル: train.py プロジェクト: fenglian425/Hefei_ECG
def train_model(cfg: Config, weight_path=None, device='cuda:0'):
    now = datetime.datetime.now()
    log_dir = os.path.join(MODEL_DIR, f'{cfg.NAME.lower()}_{now:%Y%m%dT%H%M}')
    os.makedirs(log_dir, exist_ok=True)
    writer = SummaryWriter(log_dir=log_dir, flush_secs=5)

    # snapshot
    with open(os.path.join(log_dir, 'snapshot.txt'), 'w') as f:
        snapshot = cfg.get_snapshot()
        json.dump(snapshot, f, indent=4)

    model = get_model(cfg)
    model.to(device)
    if weight_path is not None:
        model.load_state_dict(torch.load(weight_path))

    if cfg.LOSS == 'ce':
        weight = None
        criterion = nn.CrossEntropyLoss(weight=weight)
    elif cfg.LOSS == 'bce':
        criterion = nn.BCEWithLogitsLoss(
            pos_weight=torch.tensor(0.5, dtype=torch.float))
    elif cfg.LOSS == 'focal_loss':
        criterion = BCEFocalLoss()

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    if cfg.OPTIMIZER == 'sgd':
        optimizer = torch.optim.SGD(optimizer_grouped_parameters,
                                    lr=cfg.BASE_LR,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    elif cfg.OPTIMIZER == 'adam':
        optimizer = torch.optim.Adam(optimizer_grouped_parameters,
                                     lr=cfg.BASE_LR,
                                     weight_decay=5e-4)

    if cfg.SCHEDULER == 'step':
        scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
    elif cfg.SCHEDULER == 'multstep':
        scheduler = MultiStepLR(optimizer, milestones=(20, 40), gamma=0.1)

    gloabl_step = 0
    train_loader, val_loader = get_loaders(cfg)

    for epoch in range(1, cfg.EPOCHS + 1):
        batch_loss = []
        train_loss = []

        model.train()

        # scheduler(optimizer, epoch)

        scheduler.step(epoch)

        pr, gt = [], []
        for i, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)

            loss = criterion(outputs, labels)

            pr.extend(
                torch.round(
                    torch.sigmoid(outputs)).detach().cpu().numpy().squeeze())
            gt.extend(labels.cpu().numpy().squeeze())

            # print(pr)
            # print(gt)

            batch_loss.append(loss.item())
            loss = loss / cfg.ACCUMULATION_STEPS
            loss.backward()

            if (i + 1) % cfg.ACCUMULATION_STEPS == 0:
                # scheduler.step()
                # adjust_learning_rate(optimizer, cfg.BASE_LR, gloabl_step, epoch,
                #                      warmup_iters=len(train_loader) // (cfg.ACCUMULATION_STEPS * cfg.IMAGE_PER_GPU) * 5)
                optimizer.step()
                optimizer.zero_grad()
                gloabl_step += 1

                train_loss.append(np.mean(batch_loss))
                batch_loss = np.mean(batch_loss)

                lr = optimizer.state_dict()['param_groups'][0]['lr']
                writer.add_scalar('lr', lr, gloabl_step)

                print(
                    f'epoch {epoch:5d} batch {(i + 1) // cfg.ACCUMULATION_STEPS:5d}, loss:{np.mean(batch_loss):.4f}, lr:{lr:.4e}'
                )

                writer.add_scalar('batch_loss', np.mean(batch_loss),
                                  gloabl_step)
                batch_loss = []
            # break
        train_acc = accuracy_score(gt, pr)
        train_recall = recall_score(gt, pr)
        train_precision = precision_score(gt, pr)
        print(confusion_matrix(gt, pr))
        print(
            f'epoch {epoch} mean_loss:{np.mean(train_loss):.4f} acc:{train_acc:.4f} recall:{train_recall:.4f} '
            f'precision:{train_precision:.4f} pos_num:{sum(gt)} neg_num:{len(gt) - sum(gt)}'
        )

        model.eval()
        val_loss = []

        pr, gt = [], []
        with torch.no_grad():
            for j, (inputs, labels) in enumerate(val_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)

                loss = criterion(outputs, labels)
                val_loss.append(loss.item())

                pr.extend(
                    torch.round(torch.sigmoid(
                        outputs)).detach().cpu().numpy().squeeze())
                gt.extend(labels.cpu().numpy().squeeze())

        val_acc = accuracy_score(gt, pr)
        val_recall = recall_score(gt, pr)
        val_precision = precision_score(gt, pr)
        print(confusion_matrix(gt, pr))

        print(
            f'epoch {epoch} val_loss:{np.mean(val_loss):.4f} acc:{val_acc:.4f} recall:{val_recall:.4f} '
            f'precision:{val_precision:.4f} pos_num:{sum(gt)} neg_num:{len(gt) - sum(gt)}'
        )

        checkpoint_path = os.path.join(
            log_dir, "{}_{:04d}_{:.4f}.pth".format(cfg.NAME.lower(), epoch,
                                                   np.mean(val_acc)))

        writer.add_scalars('loss', {
            'loss': np.mean(train_loss),
            'val_loss': np.mean(val_loss)
        }, epoch)

        writer.add_scalars(
            'acc', {
                'train_acc': train_acc,
                'train_precsion': train_precision,
                'train_reall': train_recall
            }, epoch)
        writer.add_scalars(
            'val_acc', {
                'val_acc': val_acc,
                'val_precsion': val_precision,
                'val_reall': val_recall
            }, epoch)

        torch.save(model.state_dict(), checkpoint_path)

    writer.close()