Beispiel #1
0
def start(args):
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = True
    logging.info("args = %s", args)

    dataset = LoadData(args.data_name)
    in_dim = dataset.num_atom_type
    num_classes = 1
    criterion = nn.L1Loss()
    criterion = criterion.cuda()
    print(f"=> input dimension: {in_dim}, number classes: {num_classes}")

    genotype = ZINC_Net
    print('=> loading from genotype: \n', genotype)
    # model = Network(genotype, args.layers, in_dim, args.feature_dim, num_classes, criterion, args.data_type, args.readout, args.dropout)
    model = Network(args, genotype, num_classes, in_dim, criterion)
    model = model.cuda()
    logging.info("=> param size = %f", count_parameters_in_MB(model) * 1e6)

    train_data, val_data, test_data = dataset.train, dataset.val, dataset.test

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              pin_memory=True,
                                              num_workers=args.workers,
                                              collate_fn=dataset.collate,
                                              shuffle=True)

    valid_queue = torch.utils.data.DataLoader(val_data,
                                              batch_size=args.batch_size,
                                              pin_memory=True,
                                              num_workers=args.workers,
                                              collate_fn=dataset.collate,
                                              shuffle=False)

    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             pin_memory=True,
                                             num_workers=args.workers,
                                             collate_fn=dataset.collate,
                                             shuffle=False)

    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    elif args.optimizer == 'ADAM':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=0.001,
                                     weight_decay=0.0)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='min',
                                                               factor=0.5,
                                                               patience=10,
                                                               verbose=True)

    for epoch in range(args.epochs):
        logging.info('[EPOCH]\t%d', epoch)
        if args.optimizer == 'SGD':
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('[LR]\t%f', lr)

        # training
        train_mae, train_obj = train(train_queue, model, criterion, optimizer)
        # validation
        valid_mae, valid_obj = infer(valid_queue,
                                     model,
                                     criterion,
                                     stage='validating')
        # testing
        test_mae, test_obj = infer(test_queue,
                                   model,
                                   criterion,
                                   stage='testing   ')
        desc = '[train] mae: {:.3f}, loss: {:.3f}\t[validate] mae:{:.3f}, loss: {:.3f}\t[test] mae: {:.3f}, loss: {:.3f}'.format(
            train_mae, train_obj, valid_mae, valid_obj, test_mae, test_obj)
        logging.info(desc)

        if args.optimizer == 'ADAM':
            scheduler.step(valid_obj)
            if optimizer.param_groups[0]['lr'] < 1e-5:
                print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.")
                break
Beispiel #2
0
def main():
    args = parse_args()
    update_config(cfg, args)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    # Set the random seed manually for reproducibility.
    np.random.seed(cfg.SEED)
    torch.manual_seed(cfg.SEED)
    torch.cuda.manual_seed_all(cfg.SEED)

    # Loss
    criterion = CrossEntropyLoss(cfg.MODEL.NUM_CLASSES).cuda()

    # load arch
    genotype = eval(args.text_arch)

    model = Network(cfg.MODEL.INIT_CHANNELS, cfg.MODEL.NUM_CLASSES,
                    cfg.MODEL.LAYERS, genotype)
    model = model.cuda()

    optimizer = optim.Adam(
        model.parameters(),
        lr=cfg.TRAIN.LR,
        weight_decay=cfg.TRAIN.WD,
    )

    # resume && make log dir and logger
    if args.load_path and os.path.exists(args.load_path):
        checkpoint_file = os.path.join(args.load_path, 'Model',
                                       'checkpoint_best.pth')
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)

        # load checkpoint
        begin_epoch = checkpoint['epoch']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        best_acc1 = checkpoint['best_acc1']
        optimizer.load_state_dict(checkpoint['optimizer'])
        args.path_helper = checkpoint['path_helper']

        logger = create_logger(args.path_helper['log_path'])
        logger.info("=> loaded checkloggpoint '{}'".format(checkpoint_file))
    else:
        exp_name = args.cfg.split('/')[-1].split('.')[0]
        args.path_helper = set_path('logs_scratch', exp_name)
        logger = create_logger(args.path_helper['log_path'])
        begin_epoch = cfg.TRAIN.BEGIN_EPOCH
        best_acc1 = 0.0
        last_epoch = -1
    logger.info(args)
    logger.info(cfg)
    logger.info(f"selected architecture: {genotype}")
    logger.info("Number of parameters: {}".format(count_parameters(model)))

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(os.path.join(this_dir, './models', cfg.MODEL.NAME + '.py'),
                 args.path_helper['ckpt_path'])

    # dataloader
    train_dataset = DeepSpeakerDataset(Path(cfg.DATASET.DATA_DIR),
                                       cfg.DATASET.PARTIAL_N_FRAMES, 'train')
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE,
        num_workers=cfg.DATASET.NUM_WORKERS,
        pin_memory=True,
        shuffle=True,
        drop_last=True,
    )
    test_dataset = DeepSpeakerDataset(Path(cfg.DATASET.DATA_DIR),
                                      cfg.DATASET.PARTIAL_N_FRAMES,
                                      'test',
                                      is_test=True)
    test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size=1,
        num_workers=cfg.DATASET.NUM_WORKERS,
        pin_memory=True,
        shuffle=True,
        drop_last=True,
    )

    # training setting
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': begin_epoch * len(train_loader),
        'valid_global_steps': begin_epoch // cfg.VAL_FREQ,
    }

    # training loop
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        cfg.TRAIN.END_EPOCH,
        cfg.TRAIN.LR_MIN,
        last_epoch=last_epoch)

    for epoch in tqdm(range(begin_epoch, cfg.TRAIN.END_EPOCH),
                      desc='train progress'):
        model.train()
        model.drop_path_prob = cfg.MODEL.DROP_PATH_PROB * epoch / cfg.TRAIN.END_EPOCH

        train_from_scratch(cfg, model, optimizer, train_loader, criterion,
                           epoch, writer_dict)

        if epoch % cfg.VAL_FREQ == 0 or epoch == cfg.TRAIN.END_EPOCH - 1:
            acc = validate_identification(cfg, model, test_loader, criterion)

            # remember best acc@1 and save checkpoint
            is_best = acc > best_acc1
            best_acc1 = max(acc, best_acc1)

            # save
            logger.info('=> saving checkpoint to {}'.format(
                args.path_helper['ckpt_path']))
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                    'path_helper': args.path_helper,
                    'genotype': genotype,
                }, is_best, args.path_helper['ckpt_path'],
                'checkpoint_{}.pth'.format(epoch))

        lr_scheduler.step(epoch)
Beispiel #3
0
                       args.max_length,
                       args.embedding_type,
                       transform=data_transforms[x])
        for x in ['train', 'val']
    }

    # loss function
    if args.CMPM:
        print("import CMPM")

    compute_loss = Loss(args).cuda()
    model = Network(args).cuda()

    # compute the model size:
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # load checkpoint:
    if args.resume is not None:
        start_epoch, model = load_checkpoint(model, args.resume)
    else:
        print("Do not load checkpoint,Epoch start from 0")
        start_epoch = 0

    # opitimizer:
    opitimizer = optimizer_function(args, model)
    exp_lr_scheduler = lr_scheduler(opitimizer, args)
    main(model, dataloaders, compute_loss, opitimizer, exp_lr_scheduler,
         start_epoch, args, checkpoint_dir)
    start_test()
Beispiel #4
0
for i in range(len(HEADS)):
    train_datasets[i] = loadedDataset(train_dir, HEADS[i], args.frame_num, args.time_freq, args.patch_size)
    train_loaders[i] = torch.utils.data.DataLoader(
        train_datasets[i], batch_size=args.batch_size, shuffle=True, pin_memory=True)

model = Network(in_channels=3, out_channels=3).cuda()
rmse_loss = RMSE_Loss()
criterion = nn.MSELoss()

if os.path.exists(os.path.join(save_dir, 'checkpoint.pth.tar')):
    # load existing model
    print('==> loading existing model')
    model_info = torch.load(os.path.join(save_dir, 'checkpoint.pth.tar'))
    model.load_state_dict(model_info['state_dict'])
    optimizer = torch.optim.Adam(model.parameters())
    optimizer.load_state_dict(model_info['optimizer'])
    cur_epoch = model_info['epoch']
else:
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    # create model
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    cur_epoch = 0


for epoch in range(cur_epoch, args.epochs + 1):
    rmse = AverageMeter()
    losses = AverageMeter()

    if epoch > args.epochs // 2:
Beispiel #5
0
def start(args):
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    # seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    cudnn.benchmark = True
    cudnn.enabled = True
    logging.info("args = %s", args)

    dataset = LoadData(args.data_name)
    if args.data_name == 'SBM_PATTERN':
        in_dim = 3
        num_classes = 2
    else:
        in_dim = 7
        num_classes = 6
    print(f"=> input dimension: {in_dim}, number classes: {num_classes}")

    criterion = MyCriterion(num_classes)
    criterion = criterion.cuda()
    
    if args.data_name == 'SBM_PATTERN':
        genotype = PATTERN_Net
    elif args.data_name == 'SBM_CLUSTER':
        genotype = CLUSTER_Net
    else:
        print("Unknown dataset.")
        exit()

    print('=> loading from genotype: \n', genotype)
    model = Network(args, genotype, num_classes, in_dim, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", count_parameters_in_MB(model))

    train_data, val_data, test_data = dataset.train, dataset.val, dataset.test

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size = args.batch_size,
        pin_memory = True,
        num_workers=args.workers,
        collate_fn = dataset.collate,
        shuffle = True)

    valid_queue = torch.utils.data.DataLoader(
        val_data, batch_size = args.batch_size,
        pin_memory = True,
        num_workers=args.workers,
        collate_fn = dataset.collate,
        shuffle = False)

    test_queue = torch.utils.data.DataLoader(
        test_data, batch_size=args.batch_size,
        pin_memory=True,
        num_workers=args.workers,
        collate_fn=dataset.collate,
        shuffle = False)
    
    
    
    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum,
                                weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    elif args.optimizer == 'ADAM':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                     factor=0.5,
                                                     patience=5,
                                                     verbose=True)
    
    for epoch in range(args.epochs):
        logging.info('[EPOCH]\t%d', epoch)
        if args.optimizer == 'SGD':
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('[LR]\t%f', lr)

        macro_acc, micro_acc, train_obj = train(train_queue, model, criterion, optimizer)
        # validation
        macro_acc, micro_acc, valid_obj = infer(valid_queue, model, criterion, stage = 'validating')
        # testing
        macro_acc, micro_acc, test_obj = infer(test_queue, model, criterion, stage = ' testing   ')

        if args.optimizer == 'ADAM':
            scheduler.step(valid_obj)
            if optimizer.param_groups[0]['lr'] < 1e-5:
                print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.")
                break
Beispiel #6
0
def start(args):
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = True
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    dataset = LoadData(args.data_name)
    in_dim = dataset.train[0][0].ndata['feat'][0].size(0)
    num_classes = len(np.unique(np.array(dataset.train[:][1])))
    print(f"=> input dimension: {in_dim}, number classes: {num_classes}")
    
    if args.data_name == 'MNIST':
        genotype = MNIST_Net
    elif args.data_name == 'CIFAR10':
        genotype = CIFAR10_Net
    else:
        print("Unknown dataset.")
        exit()

    print('=> loading from genotype: \n', genotype)
    model = Network(args, genotype, num_classes, in_dim, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", count_parameters_in_MB(model))

    train_data, val_data, test_data = dataset.train, dataset.val, dataset.test

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size = args.batch_size,
        pin_memory = True,
        num_workers=args.workers,
        collate_fn = dataset.collate,
        shuffle = True) #新增shuffle

    valid_queue = torch.utils.data.DataLoader(
        val_data, batch_size = args.batch_size,
        pin_memory = True,
        num_workers=args.workers,
        collate_fn = dataset.collate,
        shuffle = False)

    test_queue = torch.utils.data.DataLoader(
        test_data, batch_size=args.batch_size,
        pin_memory=True,
        num_workers=args.workers,
        collate_fn=dataset.collate,
        shuffle = False)
    
    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    elif args.optimizer == 'ADAM':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=3e-6)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                               factor=0.5,
                                                               patience=5,
                                                               verbose=True)
        
    for epoch in range(args.epochs):
        logging.info('[EPOCH]\t%d', epoch)
        if args.optimizer == 'SGD':
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('[LR]\t%f', lr)

        # training
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion, stage = 'validating')
        # testing
        test_acc, test_obj = infer(test_queue, model, criterion, stage = 'testing   ')
        desc = '[train] acc: {:.3f}, loss: {:.3f}\t[validate] acc:{:.3f}, loss: {:.3f}\t[test] acc: {:.3f}, loss: {:.3f}'.format(
            train_acc, train_obj, valid_acc, valid_obj, test_acc, test_obj
        )
        logging.info(desc)

        if args.optimizer == 'ADAM':
            scheduler.step(valid_obj)
            if optimizer.param_groups[0]['lr'] < 1e-5:
                print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.")
                break