Ejemplo n.º 1
0
 def __getitem__(self, idx):
     with self.count.get_lock():
         self.count.value += 1
         if self.count.value == self.num_workers:
             self.barrier.release()
     self.barrier.acquire()
     self.barrier.release()
     return torch.initial_seed()
Ejemplo n.º 2
0
myDataset = MyDataset()
for sampleInfo in config['samples']:
    if 'ignore' in sampleInfo and sampleInfo['ignore']: continue
    name = sampleInfo['name']
    myDataset.addSample(name, sampleInfo['path'], weight=sampleInfo['xsec']/sampleInfo['ngen'])
    myDataset.setProcessLabel(name, sampleInfo['label'])
myDataset.initialize()

procNames = myDataset.sampleInfo['procName'].unique()

from torch.utils.data import DataLoader
lengths = [int(0.6*len(myDataset)), int(0.2*len(myDataset))]
lengths.append(len(myDataset)-sum(lengths))
torch.manual_seed(config['training']['randomSeed1'])
trnDataset, valDataset, testDataset = torch.utils.data.random_split(myDataset, lengths)
torch.manual_seed(torch.initial_seed())

kwargs = {'num_workers':config['training']['nDataLoaders']}
allLoader = DataLoader(myDataset, batch_size=args.batch, shuffle=False, **kwargs)
trnLoader = DataLoader(trnDataset, batch_size=args.batch, shuffle=False, **kwargs)
valLoader = DataLoader(valDataset, batch_size=args.batch, shuffle=False, **kwargs)
testLoader = DataLoader(testDataset, batch_size=args.batch, shuffle=False, **kwargs)

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

bins = [None, None, None]
imgHist_val_sig = [np.zeros(nbinsx[i]) for i in range(3)]
imgHist_val_bkg = [np.zeros(nbinsx[i]) for i in range(3)]
imgSum_val_sig, imgSum_val_bkg = None, None
Ejemplo n.º 3
0
 def __getitem__(self, idx):
     return torch.initial_seed()
Ejemplo n.º 4
0
        # the config object should only be used in this file, to keep an overview over the usage
        config = get_config(args.config_file, args.config_overwrites)
        run_folder = prepare_experiment(args, config)

    logger = get_logger_to_file(run_folder, "main")

    logger.info("Running: %s", str(sys.argv))

    #
    # random seeds
    #
    torch.manual_seed(config["random_seed"])
    numpy.random.seed(config["random_seed"])
    random.seed(config["random_seed"])

    logger.info("Torch seed: %i ", torch.initial_seed())

    # hardcode gpu usage
    cuda_device = 0  # always take the first -> set others via cuda flag in bash
    perf_monitor = PerformanceMonitor.get()
    perf_monitor.start_block("startup")

    #
    # create (or load) model instance
    # -------------------------------
    #
    # * vocab (pre-built, to make the embedding matrix smaller, see generate_vocab.py)
    # * pre-trained embedding
    # * network
    # * optimizer & loss function
    #
Ejemplo n.º 5
0
def main():
    configs = prepare()
    if configs.evaluate is not None:
        configs.evaluate.fn(configs)
        return

    import numpy as np
    import tensorboardX
    import torch
    import torch.backends.cudnn as cudnn
    from torch.utils.data import DataLoader
    from tqdm import tqdm

    ################################
    # Train / Eval Kernel Function #
    ################################

    def adjust_learning_rate(optimizer, epoch, args_lr):
        """Sets the learning rate to the initial LR decayed by half by every 5 or 10 epochs"""
        if epoch > 0:
            if epoch <= 30:
                lr = args_lr * (0.5**(epoch // 5))
            else:
                lr = args_lr * (0.5**(epoch // 10))
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            writer.add_scalar('lr_dis', lr, epoch)

    # train kernel
    def train(model, source_loader, target_loader, criterion, optimizer_g,
              optimizer_cls, optimizer_dis, scheduler_g, scheduler_cls,
              current_step, writer, cons):

        model.train()
        loss_total = 0
        loss_adv_total = 0
        loss_node_total = 0
        data_total = 0

        batch_iterator = zip(loop_iterable(source_loader),
                             loop_iterable(target_loader))

        for _ in trange(len(source_loader)):
            (inputs, targets), (inputs_t, _) = next(batch_iterator)

            if isinstance(inputs, dict):
                for k, v in inputs.items():
                    batch_size = v.size(0)
                    inputs[k] = v.to(configs.device, non_blocking=True)
            else:
                batch_size = inputs.size(0)
                inputs = inputs.to(configs.device, non_blocking=True)

            if isinstance(inputs_t, dict):
                for k, v in inputs_t.items():
                    batch_size = v.size(0)
                    inputs_t[k] = v.to(configs.device, non_blocking=True)
            else:
                batch_size = inputs_t.size(0)
                inputs_t = inputs_t.to(configs.device, non_blocking=True)

            if isinstance(targets, dict):
                for k, v in targets.items():
                    targets[k] = v.to(configs.device, non_blocking=True)
            else:
                targets = targets.to(configs.device, non_blocking=True)

            outputs = model(inputs)

            pred_t1, pred_t2 = model.module.inst_seg_net(
                {
                    'features': inputs_t['features'],
                    'one_hot_vectors': inputs_t['one_hot_vectors']
                },
                constant=cons,
                adaptation=True)

            loss_s = criterion(outputs, targets)

            # Adversarial loss
            loss_adv = -1 * discrepancy_loss(pred_t1, pred_t2)

            loss = loss_s + loss_adv
            loss.backward()
            optimizer_g.step()
            optimizer_cls.step()
            optimizer_g.zero_grad()
            optimizer_cls.zero_grad()

            # Local Alignment
            _, _, feat_node_s = model.module.inst_seg_net(
                {
                    'features': inputs['features'],
                    'one_hot_vectors': inputs['one_hot_vectors']
                },
                node_adaptation_s=True)

            _, _, feat_node_t = model.module.inst_seg_net(
                {
                    'features': inputs_t['features'],
                    'one_hot_vectors': inputs_t['one_hot_vectors']
                },
                node_adaptation_t=True)

            sigma_list = [0.01, 0.1, 1, 10, 100]
            loss_node_adv = 1 * mmd.mix_rbf_mmd2(feat_node_s, feat_node_t,
                                                 sigma_list)
            loss = loss_node_adv

            loss.backward()
            optimizer_dis.step()
            optimizer_dis.zero_grad()

            loss_total += loss_s.item() * batch_size
            loss_adv_total += loss_adv.item() * batch_size
            loss_node_total += loss_node_adv.item() * batch_size
            data_total += batch_size

            writer.add_scalar('loss_s/train', loss_total / data_total,
                              current_step)
            writer.add_scalar('loss_adv/train', loss_adv_total / data_total,
                              current_step)
            writer.add_scalar('loss_node/train', loss_node_total / data_total,
                              current_step)
            current_step += batch_size

        if scheduler_g is not None:
            scheduler_g.step()

        if scheduler_cls is not None:
            scheduler_cls.step()

    # evaluate kernel
    def evaluate(model, loader, split='test'):
        meters = {}
        for k, meter in configs.train.meters.items():
            meters[k.format(split)] = meter()
        model.eval()
        with torch.no_grad():
            for inputs, targets in tqdm(loader, desc=split, ncols=0):
                if isinstance(inputs, dict):
                    for k, v in inputs.items():
                        inputs[k] = v.to(configs.device, non_blocking=True)
                else:
                    inputs = inputs.to(configs.device, non_blocking=True)
                if isinstance(targets, dict):
                    for k, v in targets.items():
                        targets[k] = v.to(configs.device, non_blocking=True)
                else:
                    targets = targets.to(configs.device, non_blocking=True)
                outputs = model(inputs)
                for meter in meters.values():
                    meter.update(outputs, targets)
        for k, meter in meters.items():
            meters[k] = meter.compute()
        return meters

    ###########
    # Prepare #
    ###########

    if configs.device == 'cuda':
        cudnn.benchmark = True
        if configs.get('deterministic', False):
            cudnn.deterministic = True
            cudnn.benchmark = False
    if ('seed' not in configs) or (configs.seed is None):
        configs.seed = torch.initial_seed() % (2**32 - 1)
    seed = configs.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    print(configs)

    #####################################################################
    # Initialize DataLoaders, Model, Criterion, LRScheduler & Optimizer #
    #####################################################################

    print(f'\n==> loading source dataset "{configs.source_dataset}"')
    source_dataset = configs.source_dataset()
    source_loaders = {
        "train":
        DataLoader(
            source_dataset["train"],
            shuffle=True,
            batch_size=configs.train.batch_size,
            drop_last=True,
            num_workers=configs.data.num_workers,
            pin_memory=True,
            worker_init_fn=lambda worker_id: np.random.seed(seed + worker_id))
    }

    print(f'\n==> loading target dataset "{configs.target_dataset}"')
    target_dataset = configs.target_dataset()
    target_loaders = {}
    for split in target_dataset:
        target_loaders[split] = DataLoader(
            target_dataset[split],
            shuffle=(split == 'train'),
            batch_size=configs.train.batch_size,
            drop_last=True,
            num_workers=configs.data.num_workers,
            pin_memory=True,
            worker_init_fn=lambda worker_id: np.random.seed(seed + worker_id))

    print(f'\n==> creating model "{configs.model}"')
    model = configs.model()
    if configs.device == 'cuda':
        model = torch.nn.DataParallel(model)
    model = model.to(configs.device)
    criterion = configs.train.criterion().to(configs.device)
    #params
    gen_params = [{
        'params': v
    } for k, v in model.module.inst_seg_net.g.named_parameters()
                  if 'pred_offset' not in k]

    cls_params = [{
        'params': model.module.inst_seg_net.c1.parameters()
    }, {
        'params': model.module.inst_seg_net.c2.parameters()
    }, {
        'params': model.module.center_reg_net.parameters()
    }, {
        'params': model.module.box_est_net.parameters()
    }]

    dis_params = [{
        'params': model.module.inst_seg_net.g.parameters()
    }, {
        'params': model.module.inst_seg_net.attention_s.parameters()
    }, {
        'params': model.module.inst_seg_net.attention_t.parameters()
    }]

    optimizer_g = configs.train.optimizer_g(gen_params)
    optimizer_cls = configs.train.optimizer_cls(cls_params)
    optimizer_dis = configs.train.optimizer_dis(dis_params)

    last_epoch, best_metrics = -1, {m: None for m in configs.train.metrics}

    if os.path.exists(configs.train.checkpoint_path):

        print(f'==> loading checkpoint "{configs.train.checkpoint_path}"')
        checkpoint = torch.load(configs.train.checkpoint_path)

        print(' => loading model')
        model.load_state_dict(checkpoint.pop('model'))

        if 'optimizer_g' in checkpoint and checkpoint[
                'optimizer_g'] is not None:
            print(' => loading optimizer_g')
            optimizer_g.load_state_dict(checkpoint.pop('optimizer_g'))

        if 'optimizer_cls' in checkpoint and checkpoint[
                'optimizer_cls'] is not None:
            print(' => loading optimizer_cls')
            optimizer_cls.load_state_dict(checkpoint.pop('optimizer_cls'))

        if 'optimizer_dis' in checkpoint and checkpoint[
                'optimizer_dis'] is not None:
            print(' => loading optimizer_dis')
            optimizer_dis.load_state_dict(checkpoint.pop('optimizer_dis'))

        last_epoch = checkpoint.get('epoch', last_epoch)
        meters = checkpoint.get('meters', {})

        for m in configs.train.metrics:
            best_metrics[m] = meters.get(m + '_best', best_metrics[m])

        del checkpoint

    if 'scheduler_g' in configs.train and configs.train.scheduler_g is not None:
        configs.train.scheduler_g.last_epoch = last_epoch
        print(f'==> creating scheduler "{configs.train.scheduler_g}"')
        scheduler_g = configs.train.scheduler_g(optimizer_g)
    else:
        scheduler_g = None

    if 'scheduler_c' in configs.train and configs.train.scheduler_c is not None:
        configs.train.scheduler_c.last_epoch = last_epoch
        print(f'==> creating scheduler "{configs.train.scheduler_c}"')
        scheduler_c = configs.train.scheduler_c(optimizer_cls)
    else:
        scheduler_c = None

    ############
    # Training #
    ############

    if last_epoch >= configs.train.num_epochs:
        meters = dict()
        for split, loader in target_loaders.items():
            if split != 'train':
                meters.update(evaluate(model, loader=loader, split=split))
        for k, meter in meters.items():
            print(f'[{k}] = {meter:2f}')
        return

    with tensorboardX.SummaryWriter(configs.train.save_path) as writer:
        step_size = min(len(source_dataset['train']),
                        len(target_dataset['train']))

        for current_epoch in range(last_epoch + 1, configs.train.num_epochs):
            current_step = current_epoch * step_size
            cons = math.sin(
                (current_epoch + 1) / configs.train.num_epochs * math.pi / 2)
            adjust_learning_rate(optimizer_dis, current_epoch,
                                 configs.train.base_lr)

            writer.add_scalar('lr_g', scheduler_g.get_lr()[0], current_epoch)
            writer.add_scalar('lr_c', scheduler_c.get_lr()[0], current_epoch)

            # train
            print(
                f'\n==> training epoch {current_epoch}/{configs.train.num_epochs}'
            )
            train(model,
                  source_loader=source_loaders['train'],
                  target_loader=target_loaders['train'],
                  criterion=criterion,
                  optimizer_g=optimizer_g,
                  optimizer_cls=optimizer_cls,
                  optimizer_dis=optimizer_dis,
                  scheduler_g=scheduler_g,
                  scheduler_cls=scheduler_c,
                  current_step=current_step,
                  writer=writer,
                  cons=cons)
            current_step += step_size

            # evaluate
            meters = dict()
            for split, loader in source_loaders.items():
                if split != 'train':
                    meters.update(evaluate(model, loader=loader, split=split))
            for k, meter in meters.items():
                print(f'Source [{k}] = {meter:2f}')

            meters = dict()
            for split, loader in target_loaders.items():
                if split != 'train':
                    meters.update(evaluate(model, loader=loader, split=split))

            # check whether it is the best
            best = {m: False for m in configs.train.metrics}
            for m in configs.train.metrics:
                if best_metrics[m] is None or best_metrics[m] < meters[m]:
                    best_metrics[m], best[m] = meters[m], True
                meters[m + '_best'] = best_metrics[m]
            # log in tensorboard
            for k, meter in meters.items():
                print(f'Target [{k}] = {meter:2f}')
                writer.add_scalar(k, meter, current_step)

            # save checkpoint
            torch.save(
                {
                    'epoch': current_epoch,
                    'model': model.state_dict(),
                    'optimizer_g': optimizer_g.state_dict(),
                    'optimizer_cls': optimizer_cls.state_dict(),
                    'optimizer_dis': optimizer_dis.state_dict(),
                    'meters': meters,
                    'configs': configs,
                }, configs.train.checkpoint_path)
            shutil.copyfile(
                configs.train.checkpoint_path,
                configs.train.checkpoints_path.format(current_epoch))
            for m in configs.train.metrics:
                if best[m]:
                    shutil.copyfile(configs.train.checkpoint_path,
                                    configs.train.best_checkpoint_paths[m])
            if best.get(configs.train.metric, False):
                shutil.copyfile(configs.train.checkpoint_path,
                                configs.train.best_checkpoint_path)
            print(f'[save_path] = {configs.train.save_path}')
Ejemplo n.º 6
0
    x_te = vec.transform(te_sentences)
    x_te = np.array(
        lib.pad_sequence(x_te,
                         maxlen=opt.maxlen,
                         padding='post',
                         truncating='post',
                         value=0))
    n_txt_feats = int(max(x_tr.max(), x_te.max()) + 10)
    logger.info("  - txt train/test min/max: [{}|{}] [{}|{}]".format(
        x_tr.min(), x_tr.max(), x_te.min(), x_te.max()))

    tr_data = [x_tr, np.array(tr_labels)]
    te_data = [x_te, np.array(te_labels)]

    torch.manual_seed(opt.seed)
    print("Seed for random numbers: ", torch.initial_seed())

    model = VDCNN(n_classes=n_classes,
                  num_embedding=n_txt_feats,
                  embedding_dim=16,
                  depth=opt.depth,
                  n_fc_neurons=2048,
                  shortcut=opt.shortcut)

    if opt.gpu:
        model.cuda()

    if opt.class_weights:
        criterion = nn.CrossEntropyLoss(
            torch.cuda.FloatTensor(opt.class_weights))
    else:
Ejemplo n.º 7
0
 def __getitem__(self, idx):
     return torch.initial_seed()
                                annotation_path=args.annotation_path_test)
    """
    train_iter = torch.utils.data.DataLoader(
        train_loader,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=8,  # 4, # change this part accordingly
        pin_memory=True)
    """
    eval_iter = torch.utils.data.DataLoader(val_loader,
                                            batch_size=args.batch_size,
                                            shuffle=True,
                                            num_workers=1,  # 4, # change this part accordingly
                                            pin_memory=True)
    """
    iter_seed = torch.initial_seed() + 100

    network = AnomalyDetector()
    net = model(
        net=network,
        criterion=RegularizedLoss(network, custom_objective).cuda(),
        model_prefix=args.model_dir,
        step_callback_freq=5,
        save_checkpoint_freq=args.save_frequency,
        opt_batch_size=args.batch_size,  # optional, 60 in the paper
    )

    if torch.cuda.is_available():
        net.net.cuda()
        torch.cuda.manual_seed(args.random_seed)
        net.net = torch.nn.DataParallel(net.net).cuda()
def get_plot_args():
    return dict(num_units_to_plot=5, seed=torch.initial_seed())
Ejemplo n.º 10
0
logger = get_logger(wkdir + "train.log")

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid)

set_random_seed(cnfg.seed, use_cuda)

td = h5py.File(cnfg.train_data, "r")
vd = h5py.File(cnfg.dev_data, "r")

ntrain = td["ndata"][:].item()
nvalid = vd["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

logger.info("Design models with seed: %d" % torch.initial_seed())
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

fine_tune_m = cnfg.fine_tune_m

tl = [str(i) for i in range(ntrain)]

mymodel = init_model_params(mymodel)
mymodel.apply(init_fixing)
if fine_tune_m is not None:
	logger.info("Load pre-trained model from: " + fine_tune_m)
	mymodel = load_model_cpu(fine_tune_m, mymodel)

#lw = torch.ones(nwordt).float()
#lw[0] = 0.0
#lossf = nn.NLLLoss(lw, ignore_index=0, reduction='sum')
Ejemplo n.º 11
0
def run():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch Environment")

    train_parser = parser.add_argument_group("Train Parameters")
    train_parser.add_argument("--epochs",
                              type=int,
                              default=160,
                              metavar="E",
                              help="number of epochs to train (default: 10)")
    train_parser.add_argument(
        "--batch-size",
        type=int,
        default=128,
        metavar="B",
        help="input batch size for training (default: 128)")
    train_parser.add_argument(
        "--test-batch-size",
        type=int,
        default=128,
        metavar="BT",
        help="input batch size for testing (default: 128)")
    train_parser.add_argument("--lr_decay",
                              type=float,
                              default=0.1,
                              metavar="LD",
                              help="learning rate decay rate")
    train_parser.add_argument("--schedule",
                              type=int,
                              nargs="*",
                              default=[80, 120],
                              help="learning rate is decayed at these epochs")
    train_parser.add_argument("--warmup-epochs",
                              type=int,
                              default=5,
                              metavar="WE",
                              help="number of warmup epochs")
    train_parser.add_argument("--no-cuda",
                              action="store_true",
                              default=False,
                              help="disables CUDA training")
    train_parser.add_argument(
        "--seed",
        type=int,
        default=7186021514134990023,
        metavar="S",
        help="random seed (default: 7186021514134990023)")

    simulator_parser = parser.add_argument_group("Simulator Parameters")
    simulator_parser.add_argument("--sim-size",
                                  type=int,
                                  default=16,
                                  metavar="N",
                                  help="size of simulator")
    simulator_parser.add_argument("--sim-gamma-shape",
                                  type=float,
                                  default=100,
                                  metavar="GSH",
                                  help="gamma shape parameter")
    simulator_parser.add_argument("--sim-gamma-scale",
                                  type=float,
                                  default=1.28,
                                  metavar="GSC",
                                  help="gamma scale parameter")

    optimizer_parser = parser.add_argument_group("Optimizer Parameters")
    optimizer_parser.add_argument("--lr",
                                  type=float,
                                  default=0.1,
                                  metavar="LR",
                                  help="learning rate (default: 0.1)")
    optimizer_parser.add_argument("--momentum",
                                  type=float,
                                  default=0.9,
                                  metavar="M",
                                  help="SGD momentum (default: 0.9)")
    optimizer_parser.add_argument("--dc",
                                  type=float,
                                  default=2,
                                  metavar="DC",
                                  help="Delay Compensation (default: 0)")
    optimizer_parser.add_argument("--weight-decay",
                                  type=float,
                                  default=1e-4,
                                  metavar="WD",
                                  help="SGD weight decay (default: 0)")

    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    random.seed(torch.initial_seed())

    print("*** Configuration ***")
    for k in vars(args):
        print(str(k), ":", str(getattr(args, k)))

    train_set, test_set = get_cifar_10_data_set(
    )  # get CIFAR-10 train and test set
    args.train_loader = data_loader(train_set, is_train=True, args=args)
    args.test_loader = data_loader(test_set, is_train=False, args=args)
    args.model = resnet20_cifar()  # get ResNet-20 Model
    if args.cuda:
        args.model = args.model.cuda()
    args.loss_fn = nn.CrossEntropyLoss()  # use cross-entropy loss

    # create optimizer
    args.optimizer = optim.SGD(args.model.parameters(),
                               lr=args.lr,
                               momentum=args.momentum,
                               weight_decay=args.weight_decay)

    assert len(args.optimizer.param_groups) == 1

    # initialize optimizer's momentum
    for p in args.model.parameters():
        args.optimizer.state[p]["momentum_buffer"] = torch.zeros_like(p.data)

    # clone weights for master
    args.master_weights = init_weights(args.model.parameters())

    # clone weights, one for each  worker
    args.worker_weights = [
        init_weights(args.model.parameters()) for _ in range(args.sim_size)
    ]

    # clone optimizer, one for each  worker
    args.worker_momentum = [
        init_momentum(args.model.parameters()) for _ in range(args.sim_size)
    ]

    # create the gamma distribution order
    args.worker_order = iter(GammaRandomWorkerSelection(args))

    # initialize dana
    args.momentum_sum = {
        id(p): torch.zeros_like(p)
        for p in args.model.parameters()
    }

    # initialize warmup
    args.warmup_lr = np.linspace(args.lr / args.sim_size, args.lr,
                                 len(args.train_loader) *
                                 args.warmup_epochs).tolist()

    print("*** Training with DANA-DC ***")

    for epoch in range(args.epochs):
        learning_rate_decay(epoch, args)
        train(epoch, args)
        evaluate(epoch, args)
Ejemplo n.º 12
0
import seaborn as sns
matplotlib.use("agg")
from matplotlib import pyplot as plt

from model import *
from generative_utils import *
from utils import *
from arguments import *
from data import *

seed = 12  #12
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.initial_seed()
torch.set_printoptions(precision=2)

plt.rcParams["font.weight"] = "bold"
plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams['lines.linewidth'] = 2.0


def get_df(traj_p):
    df = []
    columns = ['ix', 'x', 'y']
    #columns = ['ix', 't', 'x', 'y']
    for ix in range(traj_p.size(0)):
        #for t in range(traj_p.size(1)):
        #df.append([ix, t, traj_p[ix, t, 0].item(), traj_p[ix, t, 1].item()])
        df.append([ix, traj_p[ix, ..., 0].item(), traj_p[ix, ..., 1].item()])
Ejemplo n.º 13
0
def main():
    # # check/wait GPU is free
    # allocated_ids = [int(item) for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')]
    # gpu_free = False
    # while not gpu_free:
    #     tmp_gpu_free = True
    #     for gpu_id in allocated_ids:
    #         # print(gpu_id, len(GPUtil.getGPUs()))
    #         mem_used = GPUtil.getGPUs()[gpu_id].memoryUsed
    #         if mem_used > 1000:
    #             # print('mem used', gpu_id, mem_used)
    #             tmp_gpu_free = False
    #             break
    #     gpu_free = tmp_gpu_free
    #     if not gpu_free:
    #         time.sleep(300)

    # parse args
    global args
    args = parse_args(sys.argv[1])
    args.test_size = 512

    # -------------------- default arg settings for this model --------------------
    # TODO: find better way for defining model-specific default args
    if hasattr(args, 'norm'):
        args.normD = args.norm
        args.normQ = args.norm
        args.normG = args.norm


    if hasattr(args, 'lambda_D_GAN') and args.lambda_D_GAN != 1.:
        """ process deprecated lambda_D_GAN """
        args.lambda_GAN = args.lambda_D_GAN
        assert args.lambda_D_GAN == args.lambda_G_GAN

    # add timestamp to ckpt_dir
    # if not args.debug:
    args.timestamp = time.strftime('%m%d%H%M%S', time.localtime())
    args.ckpt_dir += '_' + args.timestamp
    if args.lambda_G_recon > 0:
        args.display_ncols = 5 if args.lambda_dis > 0 else 3
        if args.lambda_dis > 0 and args.lambda_G_rand_recon > 0:
            args.display_ncols += 1
    else:
        args.display_ncols = 3 if args.lambda_dis > 0 else 2



    # !!! FINISH defining args before logging args
    # -------------------- init ckpt_dir, logging --------------------
    os.makedirs(args.ckpt_dir, mode=0o777, exist_ok=True)

    # -------------------- init visu --------------------
    visualizer = Visualizer(args)

    # logger = Logger(osp.join(args. ckpt_dir, 'log'))
    visualizer.logger.log('sys.argv:\n' + ' '.join(sys.argv))
    for arg in sorted(vars(args)):
        visualizer.logger.log('{:20s} {}'.format(arg, getattr(args, arg)))
    visualizer.logger.log('')

    # -------------------- code copy --------------------
    # TODO: find better approach
    # copy config yaml
    shutil.copyfile(sys.argv[1], osp.join(args.ckpt_dir, osp.basename(sys.argv[1])))

    repo_basename = osp.basename(osp.dirname(osp.abspath(__file__)))
    repo_path = osp.join(args.ckpt_dir, repo_basename)
    os.makedirs(repo_path, mode=0o777, exist_ok=True)

    walk_res = os.walk('.')
    useful_paths = [path for path in walk_res if
                    '.git' not in path[0] and
                    'checkpoints' not in path[0] and
                    'configs' not in path[0] and
                    '__pycache__' not in path[0] and
                    'tee_dir' not in path[0] and
                    'tmp' not in path[0]]
    # print('useful_paths', useful_paths)
    for p in useful_paths:
        for item in p[-1]:
            if not (item.endswith('.py') or item.endswith('.c') or item.endswith('.h') or item.endswith('.md')):
                continue
            old_path = osp.join(p[0], item)
            new_path = osp.join(repo_path, p[0][2:], item)
            basedir = osp.dirname(new_path)
            os.makedirs(basedir, mode=0o777, exist_ok=True)
            shutil.copyfile(old_path, new_path)

    # if args.evaluate:
    #     shutil.copyfile(args.resume, osp.join(args.ckpt_dir, 'model_used.pth.tar'))
    # If cannot find file, will raise FileNotFoundError
    # The destination location must be writable; otherwise, an OSError exception will be raised.
    #  If dst already exists, it will be replaced. Special files such as character or block devices
    #  and pipes cannot be copied with this function.

    # -------------------- dataset & loader --------------------
    train_dataset = datasets.__dict__[args.dataset](
        train=True,
        transform=transforms.Compose([
            transforms.Resize(args.imageSize, Image.BICUBIC),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5),
                                 (0.5, 0.5, 0.5))
        ]),
        args=args
    )
    visualizer.logger.log('train_dataset: ' + str(train_dataset))
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2 ** 32))
    )

    if not args.debug:
        args.html_iter_freq = len(train_loader) // args.html_per_train_epoch
        visualizer.logger.log('change args.html_iter_freq to %s' % args.html_iter_freq)
        args.save_iter_freq = len(train_loader) // args.html_per_train_epoch
        visualizer.logger.log('change args.save_iter_freq to %s' % args.html_iter_freq)

    test_dataset = datasets.__dict__[args.dataset](
        train=False,
        transform=transforms.Compose([
            transforms.Resize(args.imageSize, Image.BICUBIC),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5),
                                 (0.5, 0.5, 0.5))
        ]),
        args=args
    )

    visualizer.logger.log('test_dataset: ' + str(test_dataset))
    visualizer.logger.log('test img paths:')
    for anno in test_dataset.raw_annotations:
        visualizer.logger.log('%s %d %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f' % (anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11]))
    visualizer.logger.log('')

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2 ** 32)),
        drop_last=True
    )
    #assert len(test_loader) == 1
    print('test_loader has {} images'.format(len(test_loader)))

    # --------------------------------------------------------------------------------
    # -------------------- create model --------------------
    # visualizer.logger.log("=>  creating model '{}'".format(args.arch))

    args.gpu_ids = list(range(len(os.environ['CUDA_VISIBLE_DEVICES'].split(','))))
    args.device = torch.device('cuda:0') if args.gpu_ids else torch.device('cpu')

    model_dict = {}
    model_dict['D_nets'] = []
    model_dict['G_nets'] = []

    # D, Q
    if args.lambda_dis > 0:
        if args.recon_pair_GAN:
            infogan_func = models.define_infoGAN_pair_D
        else:
            infogan_func = models.define_infoGAN
        model_dict['D'], model_dict['Q'] = infogan_func(
            args.output_nc,
            args.ndf,
            args.which_model_netD,
            args.n_layers_D,
            args.n_layers_Q,
            16,
            args.passwd_length // 4,
            args.normD,
            args.normQ,
            args.init_type,
            args.init_gain,
            args.gpu_ids,
            args.use_old_Q,
            args.use_minus_Q)

        model_dict['G_nets'].append(model_dict['Q'])
        if args.lambda_GAN == 0:
            del model_dict['D']
        else:
            model_dict['D_nets'].append(model_dict['D'])
    else:
        if args.lambda_GAN > 0:
            model_dict['D'] = models.define_D(args.input_nc, args.ndf, args.which_model_netD, args.n_layers_D,
                                   args.normD, args.no_lsgan,
                                   args.init_type, args.init_gan,
                                   args.gpu_ids)
            model_dict['D_nets'].append(model_dict['D'])

    # G
    if 'with_noise' in args.which_model_netG or args.lambda_dis == 0.:
        G_input_nc = args.input_nc
    else:
        G_input_nc = args.input_nc + args.passwd_length

    model_dict['G'] = models.define_G(G_input_nc, args.output_nc,
                                      args.ngf, args.which_model_netG, args.n_downsample_G,
                                      args.normG, not args.no_dropout,
                                      args.init_type, args.init_gain,
                                      args.gpu_ids,
                                      args.passwd_length,
                                      use_leaky=args.use_leakyG,
                                      use_resize_conv=args.use_resize_conv)
    model_dict['G_nets'].append(model_dict['G'])

    # D_pair
    if args.lambda_pair_GAN > 0:
        model_dict['pair_D'] = models.define_D(args.input_nc * 2, args.ndf, args.which_model_netD, args.n_layers_D,
                                               args.normD, args.no_lsgan,
                                               args.init_type, args.init_gain,
                                               args.gpu_ids)
        model_dict['D_nets'].append(model_dict['pair_D'])

    # FR
    netFR = models.sphere20a(feature=args.feature_layer)
    if len(args.gpu_ids) > 0:
        assert (torch.cuda.is_available())
        netFR.to(args.gpu_ids[0])
        netFR = torch.nn.DataParallel(netFR, args.gpu_ids)
    netFR.module.load_state_dict(torch.load('./pretrained_models/sphere20a_20171020.pth', map_location='cpu'))
    model_dict['FR'] = netFR
    model_dict['D_nets'].append(netFR)

    visualizer.logger.log('model_dict')
    for k, v in model_dict.items():
        visualizer.logger.log(k+':')
        if isinstance(v, list):
            visualizer.logger.log('list, len: ' + str(len(v)))
            for item in v:
                visualizer.logger.log(item.module.__class__.__name__, end=' ')
            visualizer.logger.log('')
        else:
            visualizer.logger.log(v)

    # -------------------- criterions --------------------
    criterion_dict = {
        'GAN': models.GANLoss(args.gan_mode).to(args.device),
        'FR': models.AngleLoss().to(args.device),
        'L1': torch.nn.L1Loss().to(args.device),
        'DIS': torch.nn.CrossEntropyLoss().to(args.device),
        'Feat': torch.nn.CosineEmbeddingLoss().to(args.device) if args.feature_loss == 'cos' else torch.nn.MSELoss().to(args.device)
    }
    # -------------------- optimizers --------------------
    # considering separate optimizer for each network?
    optimizer_G_params = [{'params': model_dict['G'].parameters(), 'lr': args.lr}]
    if args.lambda_dis > 0:
        optimizer_G_params.append({'params': model_dict['Q'].parameters(), 'lr': args.lr})

    optimizer_G = torch.optim.Adam(optimizer_G_params,
                                   lr=args.lr,
                                   betas=(args.beta1, 0.999),
                                   weight_decay=args.weight_decay)

    optimizer_D_params = []
    if args.lambda_GAN > 0:
        optimizer_D_params.append({'params': model_dict['D'].parameters(), 'lr': args.lr})
    if not args.fix_FR and args.lambda_FR > 0:
        optimizer_D_params.append({'params': netFR.parameters(), 'lr': args.lr * 0.1})
    if args.lambda_pair_GAN > 0:
        optimizer_D_params.append({'params':model_dict['pair_D'].parameters(), 'lr': args.lr})

    if len(optimizer_D_params):
        optimizer_D = torch.optim.Adam(optimizer_D_params,
                                       betas=(args.beta1, 0.999),
                                       weight_decay=args.weight_decay)
    else:
        optimizer_D = None

    optimizer_dict = {
        'G': optimizer_G,
        'D': optimizer_D
    }

    fake_pool = ImagePool(args.pool_size)
    recon_pool = ImagePool(args.pool_size)
    fake_pair_pool = ImagePool(args.pool_size)
    WR_pair_pool = ImagePool(args.pool_size)

    if args.resume:
        if osp.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            args.start_epoch = checkpoint['epoch'] + 1

            for name, net in model_dict.items():
                if isinstance(net, list):
                    continue
                if hasattr(args, 'not_resume_models') and (name in args.not_resume_models):
                    continue
                if isinstance(net, torch.nn.DataParallel):
                    net = net.module
                if 'state_dict_' + name in checkpoint:
                    try:
                        net.load_state_dict(checkpoint['state_dict_' + name])
                    except Exception as e:
                        visualizer.logger.log('fail to load model '+name+' '+str(e))
                else:
                    visualizer.logger.log('model '+name+' not in checkpoints, just skip')

            if args.resume_optimizer:
                for name, optimizer in optimizer_dict.items():
                    if 'optimizer_' + name in checkpoint:
                        optimizer.load_state_dict(checkpoint['optimizer_' + name])
                    else:
                        visualizer.logger.log('optimizer ' + name + ' not in checkpoints, just skip')

            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
        gc.collect()
        # torch.cuda.empty_cache()

    torch.backends.cudnn.benchmark = True

    # -------------------- miscellaneous --------------------

    if args.lambda_dis > 0:
        fixed_z, fixed_dis_target, fixed_rand_z, fixed_rand_dis_target = generate_code(args.passwd_length, args.batch_size, args.device, inv=False)
        print(fixed_z)
    else:
        fixed_z, fixed_rand_z = None, None

    # for epoch in range(args.start_epoch, args.num_epochs):
    #     print('epoch', epoch)
    #     # train
    #     if args.lambda_dis > 0:
    #         model_dict['Q'].train()
    #     if args.lambda_GAN > 0:
    #         model_dict['D'].train()
    #     if args.lambda_pair_GAN > 0:
    #         model_dict['pair_D'].train()
    #     model_dict['G'].train()
    #     if not args.fix_FR:
    #         model_dict['FR'].train()
    #
    #     epoch_start_time = time.time()
    #     train(train_loader, model_dict, criterion_dict, optimizer_dict, fake_pool, recon_pool, fake_pair_pool, WR_pair_pool, visualizer, epoch, args, test_loader, fixed_z, fixed_rand_z)
    #     epoch_time = time.time() - epoch_start_time
    #     message = 'epoch %s total time %s\n' % (epoch, epoch_time)
    #     visualizer.logger.log(message)
    #
    #     gc.collect()
    #     # torch.cuda.empty_cache()
    #
    #     # save model
    #     if epoch % args.save_epoch_freq == 0:
    #         save_model(epoch, model_dict, optimizer_dict, args, iter=len(train_loader))
    #     # test visualization
    #     if epoch % args.html_epoch_freq == 0:
    test(test_loader, model_dict, criterion_dict, visualizer, 5, args, fixed_z, fixed_rand_z, 3069)
Ejemplo n.º 14
0
def init_fn(worker_id):
    random.seed((torch.initial_seed() + worker_id) % (2 ** 32))
    np.random.seed((torch.initial_seed() + worker_id) % (2 ** 32))