Esempio n. 1
0
def runing(config, _log, game_name):

    # config 파일로 부터 args 정보를 로드 합니다.
    _config = args_sanity_check(config, _log)
    args = SN(**config)
    args.device = "cuda" if args.use_cuda else "cpu"

    env_name = get_env_name(game_name)

    # log 기능을 활성화 합니다.
    logger = Logger(_log)
    unique_token = "{}__{}".format(
        args.name,
        datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    args.unique_token = unique_token

    # 텐서보드 기능을 준비 합니다.
    args.unique_token = unique_token
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", "tb_logs/{}".format(game_name))
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)

    # 실험을 시작 합니다.
    run_sequential(args, logger, env_name)
Esempio n. 2
0
def my_main(_run, _config, _log):
    global mongo_client

    import datetime
    unique_token = "{}__{}".format(_config["name"], datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    # run the framework
    # run(_run, _config, _log, mongo_client, unique_token)
    arglist = parse_args()

    logger = Logger(_log)
    # configure tensorboard logger
    unique_token = "{}__{}".format(arglist.exp_name, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    use_tensorboard = False
    if use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))), "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)
    logger.setup_sacred(_run)

    train(arglist, logger, _config)
    # arglist = convert(_config)
    #train(arglist)

    # force exit
    os._exit(0)
 def __init__(self, wrapped_observer: Observer, cache_size: int):
     self.wrapped_observer: Observer = wrapped_observer
     self.__function_runner = thread_function_runner
     self.__lock: Lock = BooleanLock()
     self.__cache_size: Optional[int] = cache_size
     self.__message_cache: List = []
     self.__error_cache: List = []
     self.__logger = Logger()
def main(client_num,
         model,
         C=1.0,
         lr=0.1,
         epoch=1,
         aggregator=FedAvg(),
         iid=True,
         client_eval=False):
    logger = Logger('output/{}'.format(
        datetime.now().strftime('%y%m%d%H%M%S')),
                    level='debug')

    # 读取配置
    cfg.merge_from_file('config/{}.yaml'.format(model))
    if C == 0:
        cfg.SOLVER.MAX_EPOCH = 2000
    cfg.freeze()

    # 加载数据
    transforms_train = transforms.Compose([
        transforms.Resize(cfg.MODEL.BACKBONE.INPUT_RESOLUTION),
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])
    train_set = torchvision.datasets.MNIST(root='data',
                                           train=True,
                                           download=True,
                                           transform=transforms_train)

    # 分配数据
    if iid:
        sample_idx = iid_sampler(train_set, client_num)
    else:
        sample_idx = non_iid_sampler(train_set, client_num)

    # 初始化联邦
    s = Server(config=cfg,
               dataset_name='MNIST',
               aggregator=aggregator,
               logger=logger)

    def flip(tensor):
        return (tensor + 2) % 10

    for i, sample in enumerate(sample_idx):
        if i < client_num - 10:
            c = Client(i, train_set, sample_idx[i], logger=logger)
        else:
            c = LabelFlippingClient(i,
                                    train_set,
                                    sample_idx[i],
                                    label_map=flip,
                                    logger=logger)
        s.append_clients(c)
    s.train(C=C, epoch_num=epoch, lr=lr, local_eval=client_eval)
    cfg.defrost()
Esempio n. 5
0
def main():
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True
    cudnn.enabled = True

    sys.stdout = Logger(osp.join(working_dir, args.logs_dir, 'log.txt'))
    dump_exp_inf(args)

    train_loader, val_loader = \
        get_data(args.train_data_dir, args.train_ann_file,
                args.val_data_dir, args.val_ann_file,
                args.height, args.width, args.batch_size, args.workers)

    model = models.create(args.arch, n_classes=63)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = nn.DataParallel(model).to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)

    # define loss function (criterion) and optimizer

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, device)
        return

    best_prec1 = 0

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, device)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, device)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.module.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            filename=osp.join(working_dir, args.logs_dir,
                              'checkpoint.pth.tar'))
Esempio n. 6
0
def run(_run, _config, _log):
    """
    运行,被main函数调用过来
    :param _run:
    :type _run:
    :param _config:
    :type _config:
    :param _log:
    :type _log:
    :return:
    :rtype:
    """
    # 更改一些config中的默认配置,例如cuda,batch等
    _config = args_sanity_check(_config, _log)
    # 改成Namespace范围的参数
    args = SN(**_config)
    args.device = "cuda" if args.use_cuda else "cpu"

    #配置日志
    logger = Logger(_log)

    _log.info("打印实验参数: ")
    experiment_params = pprint.pformat(_config,
                                       indent=4,
                                       width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # 配置Tensorboard Logger , eg: 'qmix_env=8_adam_td_lambda__2021-04-28_09-40-29'
    unique_token = "{}__{}".format(args.name, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    args.unique_token = unique_token
    # 是否使用tensorboard,使用的话,就配置下存储信息
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))), "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)

    # 默认情况下日志sacred来管理
    logger.setup_sacred(_run)

    # 运行和训练
    run_sequential(args=args, logger=logger)

    # Clean up after finishing
    print("退出主程序")

    print("停止所有线程")
    for t in threading.enumerate():
        if t.name != "MainThread":
            print("Thread {} is alive! Is daemon: {}".format(t.name, t.daemon))
            t.join(timeout=1)
            print("Thread joined")

    print("退出 script")

    # 确实退出状态
    os._exit(os.EX_OK)
Esempio n. 7
0
def main(config):

    # ensure directories are setup
    prepare_dirs(config)

    # logging
    if config.logs_dir and config.is_train:
        sys.stdout = Logger(osp.join(config.logs_dir, 'log.txt'))
    elif config.logs_dir and not config.is_train:
        sys.stdout = Logger(osp.join(config.logs_dir, 'log-test.txt'))

    # ensure reproducibility
    torch.manual_seed(config.random_seed)
    kwargs = {}
    if config.use_gpu:
        torch.cuda.manual_seed(config.random_seed)
        kwargs = {'num_workers': 1, 'pin_memory': True}

    dataset = Dataset('train')
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=16,
                                               pin_memory=True)

    test_dataset = Dataset('test')
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=config.batch_size,
                                              shuffle=False,
                                              num_workers=16,
                                              pin_memory=True)

    # instantiate trainer
    trainer = Trainer(config, (train_loader, test_loader))

    # either train
    if config.is_train:
        save_config(config)
        trainer.train()

    # or load a pretrained model and test
    else:
        trainer.test()
Esempio n. 8
0
def run(_run, _config, _log):
    # check args sanity
    _config = args_sanity_check(_config, _log)

    args = SN(**_config)
    args.device = "cuda" if args.use_cuda else "cpu"
    set_device = os.getenv('SET_DEVICE')
    if args.use_cuda and set_device != '-1':
        if set_device is None:
            args.device = "cuda"
        else:
            args.device = f"cuda:{set_device}"
    else:
        args.device = "cpu"

    # setup loggers
    logger = Logger(_log)

    _log.info("Experiment Parameters:")
    experiment_params = pprint.pformat(_config, indent=4, width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # configure tensorboard logger
    unique_token = "{}__{}".format(
        args.name,
        datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    args.unique_token = unique_token
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)

    # sacred is on by default
    logger.setup_sacred(_run)

    # Run and train
    run_sequential(args=args, logger=logger)

    # Clean up after finishing
    print("Exiting Main")

    print("Stopping all threads")
    for t in threading.enumerate():
        if t.name != "MainThread":
            print("Thread {} is alive! Is daemon: {}".format(t.name, t.daemon))
            t.join(timeout=1)
            print("Thread joined")

    print("Exiting script")

    # Making sure framework really exits
    os._exit(os.EX_OK)
Esempio n. 9
0
def run(_run, _config, _log):

    # check args sanity
    _config = args_sanity_check(_config, _log)

    args = SN(**_config)
    args.device = "cuda" if args.use_cuda else "cpu"
    if args.use_cuda:
        th.cuda.set_device(args.device_num)

    # setup loggers
    logger = Logger(_log)

    _log.info("Experiment Parameters:")
    experiment_params = pprint.pformat(_config, indent=4, width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # configure tensorboard logger
    unique_token = "{}__{}".format(
        args.name,
        datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    args.unique_token = unique_token
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        args.tb_logs = tb_exp_direc
        # args.latent_role_direc = os.path.join(tb_exp_direc, "{}").format('latent_role')
        logger.setup_tb(tb_exp_direc)
        #dump config to the tb directory
        with open(os.path.join(tb_exp_direc, "config.yaml"), "w") as f:
            yaml.dump(_config, f, default_flow_style=False)

    # sacred is on by default
    logger.setup_sacred(_run)

    # Run and train
    run_sequential(args=args, logger=logger)

    # Clean up after finishing
    print("Exiting Main")

    print("Stopping all threads")
    for t in threading.enumerate():
        if t.name != "MainThread":
            print("Thread {} is alive! Is daemon: {}".format(t.name, t.daemon))
            t.join(timeout=1)
            print("Thread joined")

    print("Exiting script")

    # Making sure framework really exits
    os._exit(os.EX_OK)
Esempio n. 10
0
def run_simulation(**kwargs):
    kp = KwargsParser(kwargs, DEFAULTS)
    folder = Path(kp.folder).expanduser()
    folder.mkdir(exist_ok=True, parents=True)

    file_str = f'L_{kp.L}_chi_{kp.chi}_g_{kp.g}_{kp.contraction_method}'
    logger = Logger(None, True)
    opt_logger = Logger(folder.joinpath(file_str + '.opt.log'), True)
    kp.log(opt_logger)
    opt_logger.lineskip()
    outfile = folder.joinpath(file_str + '.pkl')
    statefile = folder.joinpath(file_str + '.state.pkl')
    kp.log(logger)

    def callback(tensors, k):
        if (k % kp.save_interval == 0) and (k > 0):
            with open(statefile, 'wb') as _f:
                pickle.dump(dict(kwargs=kp.kwargs(), k=k, tensors=tensors), _f)

    opt_opts = dict(display_fun=get_display_fun(opt_logger),
                    line_search_fn='strong_wolfe',
                    max_iter=kp.max_iter,
                    callback=callback,
                    dtype=np.complex128)
    cont_opts = dict(contraction_method=kp.contraction_method)

    model = TFIM(kp.g, 'obc', lx=kp.L, ly=kp.L, dtype_hamiltonian=np.float64)
    gs, gs_energy = model.groundstate(kp.chi, (kp.L, kp.L), kp.initial_state, kp.initial_noise,
                                      cont_opts, opt_opts)

    results = dict(kwargs=kp.kwargs(),
                   gs_energy=gs_energy,
                   gs_tensors=gs.get_tensors(),
                   logfile=str(logger.logfile))

    print(f'saving results to {outfile}')
    # noinspection PyTypeChecker
    with open(outfile, 'wb') as f:
        pickle.dump(results, f)
Esempio n. 11
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    # cudnn.benchmark = True

    # Redirect print to both console and log file
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (240, 240)
    dataset, num_classes, train_loader, val_loader = \
        get_data(args.dataset, args.split, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model

    img_branch = models.create(args.arch,
                               cut_layer=args.cut_layer,
                               num_classes=num_classes)

    args.resume = "/mnt/lustre/renjiawei/DAIN_py/logs/Resnet50-single_view-split1/model_best.pth.tar"

    # Load from checkpoint
    start_epoch = best_top1 = 0
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        # img_high_level.load_state_dict(checkpoint['state_dict_img'])
        # diff_high_level.load_state_dict(checkpoint['state_dict_diff'])
        img_branch.load_state_dict(checkpoint['state_dict_img'])
        start_epoch = checkpoint['epoch']
        best_top1 = checkpoint['best_top1']
        print("=> Start epoch {}  best top1 {:.1%}".format(
            start_epoch, best_top1))

    img_branch = nn.DataParallel(img_branch).cuda()
    # img_branch = nn.DataParallel(img_branch)
    img_branch.train(False)

    x = torch.randn(64, 1, 224, 224, requires_grad=True)

    torch_out = torch.onnx._export(
        img_branch,  # model being run
        x,  # model input (or a tuple for multiple inputs)
        "super_resolution.onnx",
        # where to save the model (can be a file or file-like object)
        export_params=True
    )  # store the trained parameter weights inside the model file
Esempio n. 12
0
 def __init__(
     self,
     split_fracs: Dict[str, float],
     working_dir: (str) = None,
     seed: (int) = None,
     split_per_dir=False,
 ):
     if not np.isclose(np.sum([p for _, p in split_fracs.items()]), 1.):
         raise ValueError("Split probabilities have to sum up to 1.")
     self.split_fracs = split_fracs
     self.working_dir = working_dir
     self.seed = seed
     self.split_per_dir = split_per_dir
     self.splits = defaultdict(list)
     self._logger = Logger("CSVSPLIT")
Esempio n. 13
0
def run(_run, _config, _log):

    # check args sanity
    _config = args_sanity_check(_config, _log)

    args = SN(**_config)
    args.device = "cuda" if args.use_cuda else "cpu"

    # setup loggers
    logger = Logger(_log)

    _log.info("Experiment Parameters:")
    experiment_params = pprint.pformat(_config, indent=4, width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # configure tensorboard logger
    # unique_token = "{}__{}".format(args.name, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))

    try:
        map_name = _config["env_args"]["map_name"]
    except:
        map_name = _config["env_args"]["key"]
    unique_token = f"{_config['name']}_seed{_config['seed']}_{map_name}_{datetime.datetime.now()}"

    args.unique_token = unique_token
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)

    # sacred is on by default
    logger.setup_sacred(_run)

    # Run and train
    run_sequential(args=args, logger=logger)

    # Clean up after finishing
    print("Exiting Main")

    print("Stopping all threads")
    for t in threading.enumerate():
        if t.name != "MainThread":
            print("Thread {} is alive! Is daemon: {}".format(t.name, t.daemon))
            t.join(timeout=1)
            print("Thread joined")

    print("Exiting script")
Esempio n. 14
0
 def __init__(
     self,
     file_names: Iterable[str],
     working_dir=None,
     transform=None,
     logger_name="TRAIN",
     dataset_name=None,
 ):
     if isinstance(file_names, GeneratorType):
         self.file_names = list(file_names)
     else:
         self.file_names = file_names
     self.working_dir = working_dir
     self.transform = transform
     self._logger = Logger(logger_name)
     self.dataset_name = dataset_name
Esempio n. 15
0
def create_experiment(config):
    """Creates an experiment based on config."""

    device = torch.device(config.device)
    logging.info("using {}".format(config.device))

    experiment = Experiment(config.name, config.save_dir)
    experiment.register_config(config)

    logger = None
    if config.use_tflogger:
        logger = Logger(config.tflog_dir)
        experiment.register_logger(logger)

    torch.manual_seed(config.rseed)

    model = NRU(device,
                config.input_size,
                config.output_size,
                num_layers=config.num_layers,
                layer_size=config.layer_size,
                output_activation="linear",
                layer_norm=config.layer_norm,
                use_relu=config.use_relu,
                memory_size=config.memory_size,
                k=config.k).to(device)
    experiment.register_model(model)

    data_iterator = get_data_iterator(config)
    experiment.register_data_iterator(data_iterator)

    optimizer = get_optimizer(model.parameters(), config)
    model.register_optimizer(optimizer)

    tr = MyContainer()
    tr.updates_done = 0
    tr.epochs_done = 0
    tr.ce = {}
    tr.ce["train"] = []
    tr.accuracy = {}
    tr.accuracy["valid"] = []
    tr.accuracy["test"] = []
    tr.grad_norm = []

    experiment.register_train_statistics(tr)

    return experiment, model, data_iterator, tr, logger, device
Esempio n. 16
0
def main(args):
    sys.stdout = Logger(args.log_dir)

    train_loader, test_loader = get_data(args)
    model = BaseModel(args)
    evaluator = Evaluator(model=model, data_loader=test_loader)

    best_acc = evaluator.evaluate()

    accuracies = [best_acc]
    losses = []

    for e in range(1, args.epochs + 1):
        epoch_loss = 0
        print("Epoch", e)
        for data in tqdm(train_loader):
            model.set_input(data)
            model.optimize_parameters()
            epoch_loss += model.get_loss()

        print("Epoch finished with loss", epoch_loss)
        losses.append(epoch_loss)

        if e % args.eval_step == 0:
            acc = evaluator.evaluate()
            accuracies.append(acc)
            best_acc = max(acc, best_acc)
            print("[Epoch {}] Accuracy:{:.2f}, Best Accuracy:{:.2f}".format(
                e, acc, best_acc))

        if e % args.save_step == 0:
            model.save_model(e)

        model.update_lr()

        plt.figure()
        plt.plot(range(len(losses)), losses)
        plt.xlabel('Epochs')
        plt.ylabel('Training Loss')
        plt.savefig(os.path.join(args.exp_dir, 'losses.png'))

        plt.figure()
        plt.plot(range(len(accuracies)), accuracies)
        plt.xlabel('Epochs')
        plt.ylabel('Test Accuracy')
        plt.savefig(os.path.join(args.exp_dir, 'accuracies.png'))
 def __init__(
     self,
     split_fracs: Dict[str, float],
     working_dir: (str) = None,
     seed: (int) = None,
     split_per_dir=False,
 ):
     if not np.isclose(np.sum([p for _, p in split_fracs.items()]), 1.):
         # .items() return key-value pairs of the dict as tuples in a list
         raise ValueError("Split probabilities have to sum up to 1.")
     self.split_fracs = split_fracs
     self.working_dir = working_dir
     self.seed = seed
     self.split_per_dir = split_per_dir
     self.splits = defaultdict(
         list)  # defaultdict(list): pass list to .default_factory
     self._logger = Logger("CSVSPLIT")
Esempio n. 18
0
def standard_run(_config, _log, game_name):

    # check args sanity
    _config = args_sanity_check(_config, _log)

    args = SN(**_config)

    # use_cuda=True by default in default.yaml
    args.device = "cuda" if args.use_cuda else "cpu"
    #args.device = f"cuda:{args.device_num}" if args.use_cuda else "cpu"

    # setup loggers
    logger = Logger(_log)

    _log.info("Experiment Parameters:")
    experiment_params = pprint.pformat(_config, indent=4, width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # configure tensorboard logger
    unique_token = f"{args.name}__{datetime.datetime.now():%Y-%m-%d_%H-%M-%S}"  # e.g. QMIX_2021-08-04_11-09-04
    args.unique_token = unique_token
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", f"tb_logs/{game_name}")
        tb_exp_direc = os.path.join(tb_logs_direc, f"{unique_token}")
        logger.setup_tb(tb_exp_direc)

    # Run and train
    run_sequential(args=args, logger=logger)

    # Clean up after finishing
    print("Exiting Main")

    print("Stopping all threads")
    for t in threading.enumerate():
        if t.name != "MainThread":
            print(f"Thread {t.name} is alive! Is daemon: {t.daemon}")
            t.join(timeout=1)
            print("Thread joined")

    print("Exiting script")

    # Making sure framework really exits
    #os._exit(os.EX_OK) #The os.EX_* values are UNIX only.
    os._exit(0)  #For Windows
Esempio n. 19
0
def my_main(_run, _config, _log):
    global mongo_client

    import datetime

    # arglist = parse_args()
    # unique_token = "{}__{}".format(arglist.name, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    # run the framework
    # run(_run, _config, _log, mongo_client, unique_token)

    logger = Logger(_log)

    # configure tensorboard logger
    unique_token = "{}__{}".format(
        _config["label"],
        datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    use_tensorboard = False
    if use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)
    logger.setup_sacred(_run)

    _log.info("Experiment Parameters:")
    import pprint
    experiment_params = pprint.pformat(_config, indent=4, width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # START THE TRAINING PROCESS
    runner = Runner(logger)
    runner.load(_config)
    runner.reset()
    # args = vars(arglist)
    runner.run(_config)

    # runner.run(args)

    # train(arglist, logger, _config)
    # arglist = convert(_config)
    # train(arglist)

    # force exit
    os._exit(0)
Esempio n. 20
0
def main():
    args = argument_parsing()

    os.makedirs(args.save_dir, exist_ok=True)
    sys.stdout = Logger(
        os.path.join(args.save_dir, f"log_train({datetime.now()}).txt"))
    if args.cuda:
        if torch.cuda.is_available():
            cudnn.benchmark = True
        else:
            print("There is no available gpus!")
            args.cuda = False
    print("Running with {}s...".format("cpu" if args.cuda is False else "gpu"))

    print("user config".center(30, "="))
    for key, value in args.__dict__.items():
        print(f"{key}: {value}")
    print("end".center(30, "="))
    train(args)
Esempio n. 21
0
def run(_run, _config, _log, pymongo_client):

    # check args sanity
    _config = args_sanity_check(_config, _log)

    args = SN(**_config)
    args.device = "cuda" if args.use_cuda else "cpu"

    # setup loggers
    logger = Logger(_log)

    _log.info("Experiment Parameters:")
    experiment_params = pprint.pformat(_config, indent=4, width=1)
    _log.info("\n\n" + experiment_params + "\n")

    # configure tensorboard logger
    unique_token = "{}__{}".format(
        args.name,
        datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    args.unique_token = unique_token
    if args.use_tensorboard:
        tb_logs_direc = os.path.join(dirname(dirname(abspath(__file__))),
                                     "results", "tb_logs")
        tb_exp_direc = os.path.join(tb_logs_direc, "{}").format(unique_token)
        logger.setup_tb(tb_exp_direc)

    # sacred is on by default
    logger.setup_sacred(_run)

    # Run and train
    if args.cross_play and args.evaluate:
        run_sequential_cross(args=args, logger=logger)
    else:
        run_sequential(args=args, logger=logger)

    # Clean up after finishing
    print("Exiting Main")

    if pymongo_client is not None:
        print("Attempting to close mongodb client")
        pymongo_client.close()
    print("Mongodb client closed")
Esempio n. 22
0
    def create_logger(self):
        """
        Create the logger including the file log and summary log
        :return: logger and summary writer
        """
        if self.args.training:
            logger = Logger(self.args.log,
                            '%s-%s' % (self.args.method, self.args.postfix),
                            rm_exist=self.args.start_epoch == 0)
            logger.update_dict(vars(self.args))

            if self.args.mxboard:
                from mxboard import SummaryWriter
                sw = SummaryWriter(logdir=self.args.log)
            else:
                sw = None
        else:
            logger, sw = None, None

        return logger, sw
Esempio n. 23
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True
    sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))

    trainset = Feeder(args.feat_path, args.knn_graph_path, args.label_path,
                      args.seed, args.k_at_hop, args.active_connection)
    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             shuffle=True,
                             pin_memory=True)

    net = model.gcn().cuda()
    opt = torch.optim.SGD(net.parameters(),
                          args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    criterion = nn.CrossEntropyLoss().cuda()

    save_checkpoint({
        'state_dict': net.state_dict(),
        'epoch': 0,
    },
                    False,
                    fpath=osp.join(args.logs_dir, 'epoch_{}.ckpt'.format(0)))
    for epoch in range(args.epochs):
        adjust_lr(opt, epoch)

        train(trainloader, net, criterion, opt, epoch)
        save_checkpoint({
            'state_dict': net.state_dict(),
            'epoch': epoch + 1,
        },
                        False,
                        fpath=osp.join(args.logs_dir,
                                       'epoch_{}.ckpt'.format(epoch + 1)))
Esempio n. 24
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    # cudnn.benchmark = True

    # Redirect print to both console and log file
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (240, 240)
    dataset, num_classes, train_loader, val_loader, test_loader = \
        get_data(args.dataset, args.split, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers, args.combine_trainval)

    # Create model

    img_branch = models.create(args.arch,
                               cut_layer=args.cut_layer,
                               num_classes=num_classes,
                               num_features=args.features)
    diff_branch = models.create(args.arch,
                                cut_layer=args.cut_layer,
                                num_classes=num_classes,
                                num_features=args.features)

    # Load from checkpoint
    start_epoch = best_top1 = 0
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        img_branch.load_state_dict(checkpoint['state_dict_img'])
        diff_branch.load_state_dict(checkpoint['state_dict_diff'])
        start_epoch = checkpoint['epoch']
        best_top1 = checkpoint['best_top1']
        print("=> Start epoch {}  best top1 {:.1%}".format(
            start_epoch, best_top1))

    img_branch = nn.DataParallel(img_branch).cuda()
    diff_branch = nn.DataParallel(diff_branch).cuda()
    # img_branch = nn.DataParallel(img_branch)
    # diff_branch = nn.DataParallel(diff_branch)

    # Criterion
    criterion = nn.CrossEntropyLoss().cuda()
    # criterion = nn.CrossEntropyLoss()

    # Evaluator
    evaluator = Evaluator(img_branch, diff_branch, criterion)
    if args.evaluate:
        # print("Validation:")
        # top1, _ = evaluator.evaluate(val_loader)
        # print("Validation acc: {:.1%}".format(top1))
        print("Test:")
        top1, (gt, pred) = evaluator.evaluate(test_loader)
        print("Test acc: {:.1%}".format(top1))
        from confusion_matrix import plot_confusion_matrix
        plot_confusion_matrix(gt, pred, dataset.classes, args.logs_dir)
        return

    img_param_groups = [
        {
            'params': img_branch.module.low_level_modules.parameters(),
            'lr_mult': 0.1
        },
        {
            'params': img_branch.module.high_level_modules.parameters(),
            'lr_mult': 0.1
        },
        {
            'params': img_branch.module.classifier.parameters(),
            'lr_mult': 1
        },
    ]

    diff_param_groups = [
        {
            'params': diff_branch.module.low_level_modules.parameters(),
            'lr_mult': 0.1
        },
        {
            'params': diff_branch.module.high_level_modules.parameters(),
            'lr_mult': 0.1
        },
        {
            'params': diff_branch.module.classifier.parameters(),
            'lr_mult': 1
        },
    ]

    img_optimizer = torch.optim.SGD(img_param_groups,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=True)
    diff_optimizer = torch.optim.SGD(diff_param_groups,
                                     lr=args.lr,
                                     momentum=args.momentum,
                                     weight_decay=args.weight_decay,
                                     nesterov=True)

    # Trainer
    trainer = Trainer(img_branch, diff_branch, criterion)

    # Schedule learning rate
    def adjust_lr(epoch):
        step_size = args.step_size
        lr = args.lr * (0.1**(epoch // step_size))
        for g in img_optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)
        for g in diff_optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)

    # Start training
    for epoch in range(start_epoch, args.epochs):
        adjust_lr(epoch)
        trainer.train(epoch, train_loader, img_optimizer, diff_optimizer)
        if epoch < args.start_save:
            continue
        top1, _ = evaluator.evaluate(val_loader)

        is_best = top1 > best_top1
        best_top1 = max(top1, best_top1)
        save_checkpoint(
            {
                'state_dict_img': img_branch.module.state_dict(),
                'state_dict_diff': diff_branch.module.state_dict(),
                'epoch': epoch + 1,
                'best_top1': best_top1,
            },
            is_best,
            fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar'))

        print('\n * Finished epoch {:3d}  top1: {:5.1%}  best: {:5.1%}{}\n'.
              format(epoch, top1, best_top1, ' *' if is_best else ''))

    # Final test
    print('Test with best model:')
    checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
    img_branch.module.load_state_dict(checkpoint['state_dict_img'])
    diff_branch.module.load_state_dict(checkpoint['state_dict_diff'])
    top1, (gt, pred) = evaluator.evaluate(test_loader)
    from confusion_matrix import plot_confusion_matrix
    plot_confusion_matrix(gt, pred, dataset.classes, args.logs_dir)
    print('\n * Test Accuarcy: {:5.1%}\n'.format(top1))
Esempio n. 25
0
def run_simulation(**kwargs):
    kp = KwargsParser(kwargs, DEFAULTS)
    folder = Path(kp.folder).expanduser()
    folder.mkdir(exist_ok=True, parents=True)

    file_str = f'L_{kp.L}_g_{kp.g}_chi_{kp.chi}_dt_{kp.dt}_quench_{kp.quench}'
    if kp.task_id:
        file_str += f'_{kp.task_id}'
    logger = Logger(folder.joinpath(file_str + '.log'), True)
    opt_logger = Logger(folder.joinpath(file_str + '.opt.log'), True)
    outfile = folder.joinpath(file_str + '.pkl')
    kp.log(logger)

    opt_opts = dict(display_fun=get_display_fun(opt_logger),
                    line_search_fn='strong_wolfe',
                    max_iter=kp.max_iter,
                    tolerance_grad=kp.tolerance_grad)
    cont_opts = dict(contraction_method='brute')

    model = TFIM(kp.g,
                 bc='obc',
                 lx=kp.L,
                 ly=kp.L,
                 dtype_hamiltonian=np.float64)
    evolver = TimeEvolution(kp.g,
                            kp.dt,
                            'obc',
                            real_time=True,
                            lx=kp.L,
                            ly=kp.L,
                            pepo_dtype=np.complex128)

    logger.log(f'Starting with groundstate of g={kp.g} TFIM')

    # Prepare groundstate

    gs = None
    gs_energy = None

    if kp.gs_file:
        logger.log('GS file specified, loading GS from file')
        try:
            with open(kp.gs_file, 'rb') as f:
                res = pickle.load(f)
            gs_tensors = res['gs_tensors']
            gs = Peps(gs_tensors, 'obc')
            gs_energy = res['gs_energy']

            assert np.allclose(kp.g, res['kwargs']['g'])
            assert gs.lx == kp.L
            assert gs.ly == kp.L
        except Exception as e:
            logger.log('Failed to load GS from file. Error: ' + str(e))

    if (gs is None) or (gs_energy is None):
        logger.log('No GS file specified, optimising gs...')
        gs, gs_energy = model.groundstate(kp.chi, (kp.L, kp.L), 'ps', 0.05,
                                          cont_opts, opt_opts)

        logger.log('Saving GS to ' +
                   str(folder.joinpath(file_str + '.gs.pkl')))
        results = dict(kwargs=kp.kwargs(), gs=gs, gs_energy=gs_energy, g=kp.g)
        with open(folder.joinpath(file_str + '.gs.pkl'), 'wb') as f:
            pickle.dump(results, f)

    # Prepare quench

    if kp.quench == 'X':  # <Sx(r,t) Sx(center,0)>
        quench_operator = sx
        measure_operator = sx
    elif kp.quench == 'Y':  # <Sy(r,t) Sy(center,0)>
        quench_operator = sy
        measure_operator = sy
    elif kp.quench == 'Z':  # <Sz(r,t) Sz(center,0)>
        quench_operator = sz
        measure_operator = sz
    elif kp.quench == '+':  # <S+(r,t) S-(center,0)>
        quench_operator = sm
        measure_operator = sp
    else:
        raise ValueError(f'Illegal quench code {kp.quench}')

    logger.log(f'Quench: Applying quench operator to center site')
    quenched = SingleSiteOperator(quench_operator, kp.L // 2,
                                  kp.L // 2).apply_to_peps(gs)

    # Time evolution

    x_snapshot_data = onp.zeros([kp.n_steps + 1, kp.L, kp.L])
    y_snapshot_data = onp.zeros([kp.n_steps + 1, kp.L, kp.L])
    z_snapshot_data = onp.zeros([kp.n_steps + 1, kp.L, kp.L])
    correlator_data = onp.zeros([kp.n_steps + 1, kp.L, kp.L],
                                dtype=onp.complex)
    t_data = onp.zeros([kp.n_steps + 1])

    state = quenched
    opt_opts['dtype'] = np.complex128
    opt_opts['max_grad_evals_ls'] = 100
    for n in range(kp.n_steps):
        logger.log('Computing Observables')

        t = n * kp.dt
        x_snapshot_data[n, :, :] = x_snapshot(state, cont_opts)
        y_snapshot_data[n, :, :] = y_snapshot(state, cont_opts)
        z_snapshot_data[n, :, :] = z_snapshot(state, cont_opts)
        correlator_data[n, :, :] = correlator_timeslice(
            gs, state, measure_operator, gs_energy, t, **cont_opts)
        t_data[n] = t

        logger.log(f'Evolving to t={(n + 1) * kp.dt}')
        state = evolver.evolve(state,
                               contraction_options=cont_opts,
                               optimisation_options=opt_opts,
                               random_dev=None,
                               initial=kp.initial)

        # save results (will be overwritten), (in case process dies before it finishes)
        results = dict(kwargs=kp.kwargs(),
                       quench=kp.quench,
                       x_snapshot=x_snapshot_data,
                       y_snapshot=y_snapshot_data,
                       z_snapshot=z_snapshot_data,
                       correlator=correlator_data,
                       t=t_data,
                       state_tensors=state.get_tensors())
        with open(outfile, 'wb') as f:
            pickle.dump(results, f)

        if kp.save_all_peps:
            results = dict(kwargs=kp.kwargs(),
                           t=t,
                           state_tensors=state.get_tensors())
            with open(folder.joinpath(file_str + f'state_t_{t}.pkl'),
                      'wb') as f:
                pickle.dump(results, f)

    logger.log('Computing Observables')
    t = kp.n_steps * kp.dt
    x_snapshot_data[kp.n_steps, :, :] = x_snapshot(state, cont_opts)
    y_snapshot_data[kp.n_steps, :, :] = y_snapshot(state, cont_opts)
    z_snapshot_data[kp.n_steps, :, :] = z_snapshot(state, cont_opts)
    correlator_data[kp.n_steps, :, :] = correlator_timeslice(
        gs, state, measure_operator, gs_energy, t, **cont_opts)
    t_data[kp.n_steps] = t

    # save results
    logger.log(f'saving results to {outfile}')
    results = dict(kwargs=kp.kwargs(),
                   quench=kp.quench,
                   x_snapshot=x_snapshot_data,
                   y_snapshot=y_snapshot_data,
                   z_snapshot=z_snapshot_data,
                   correlator=correlator_data,
                   t=t_data,
                   state_tensors=state.get_tensors())
    with open(outfile, 'wb') as f:
        pickle.dump(results, f)

    if kp.save_all_peps:
        results = dict(kwargs=kp.kwargs(),
                       t=t,
                       state_tensors=state.get_tensors())
        with open(folder.joinpath(file_str + f'state_t_{t}.pkl'), 'wb') as f:
            pickle.dump(results, f)
Esempio n. 26
0
    dest="jit_load",
    action="store_true",
    help="Load model via torch jit (otherwise via torch load).",
)

parser.add_argument(
    "--input_file",
    type=str,
    default=None,
    help=
    "Input file could either be a directory with multiple audio files or just one single audio file"
)

ARGS = parser.parse_args()

log = Logger("PREDICT", ARGS.debug, ARGS.log_dir)
"""
Main function to compute prediction by using a trained model together with the given input
"""
if __name__ == "__main__":

    if ARGS.checkpoint_path is not None:
        log.info("Restoring checkpoint from {} instead of using a model file.".
                 format(ARGS.checkpoint_path))
        checkpoint = torch.load(ARGS.checkpoint_path)
        model = UNet(1, 1, bilinear=False)
        model.load_state_dict(checkpoint["modelState"])
        log.warning(
            "Using default preprocessing options. Provide Model file if they are changed"
        )
        dataOpts = DefaultSpecDatasetOps
def main(args):
    if not os.path.exists(args.logs_dir):
        os.mkdir(args.logs_dir)
    if not os.path.exists(args.tensorboard_dir):
        os.mkdir(args.tensorboard_dir)
    tensorboardWrite = SummaryWriter(log_dir = args.tensorboard_dir)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # log file
    if args.evaluate == 1:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log_test.txt'))
    else:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log_train.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    print("Initializing dataset {}".format(args.dataset))
    # from reid.data import get_data ,
    dataset, num_classes, train_loader, query_loader, gallery_loader = \
        get_data(args, args.dataset, args.split, args.data_dir,
                 args.batch_size, args.seq_len, args.seq_srd,
                 args.workers)
    print('[len] train: {}, query: {}, gallery: {}'.format(*list(map(len, [train_loader, query_loader, gallery_loader]))))

    # create CNN model
    # cnn_model = models.create(args.a1, args.flow1, args.flow2, num_features=args.features, dropout=args.dropout)
    cnn_model_flow = [models.create(args.a1, args.flow1, num_features=args.features, dropout=args.dropout)]
    if any(args.flow2):
        cnn_model_flow.append(models.create(args.a1, args.flow2, num_features=args.features, dropout=args.dropout))
    # cnn_model_flow1 = cnn_model_flow1.cuda()
    # cnn_model_flow2 = cnn_model_flow2.cuda()


    # create ATT model
    input_num = cnn_model_flow[0].feat.in_features  # 2048
    output_num = args.features  # 128
    att_model = models.create(args.a2, input_num, output_num)
    # att_model.cuda()

    # # ------peixian:tow attmodel------
    # att_model_flow1 = models.create(args.a2, input_num, output_num)
    # att_model_flow2 = models.create(args.a2, input_num, output_num)
    # # --------------------------------

    # create classifier model
    class_num = 2
    classifier_model = models.create(args.a3,  output_num, class_num)
    # classifier_model.cuda()

    # CUDA acceleration model

    # cnn_model = torch.nn.DataParallel(cnn_model).to(device)
    # # ------peixian:tow attmodel------
    # for att_model in [att_model_flow1, att_model_flow2]:
    #     att_model = att_model.to(device)
    # # --------------------------------
    att_model = att_model.cuda()
    classifier_model = classifier_model.cuda()

    # cnn_model = torch.nn.DataParallel(cnn_model).cuda()
    # cnn_model_flow1 = torch.nn.DataParallel(cnn_model_flow1,device_ids=[0,1,2])
    # cnn_model_flow2 = torch.nn.DataParallel(cnn_model_flow2,device_ids=[0,1,2])
    
    # 
    cnn_model_flow[0].cuda()
    cnn_model_flow[0] = torch.nn.DataParallel(cnn_model_flow[0],device_ids=[0])
    if len(cnn_model_flow) > 1:
        cnn_model_flow[1].cuda()
        cnn_model_flow[1] = torch.nn.DataParallel(cnn_model_flow[1],device_ids=[0])



    # att_model = torch.nn.DataParallel(att_model,device_ids=[1,2,3])
    # classifier_model = torch.nn.DataParallel(classifier_model,device_ids=[1,2,3])


    criterion_oim = OIMLoss(args.features, num_classes,
                            scalar=args.oim_scalar, momentum=args.oim_momentum)
    criterion_veri = PairLoss(args.sampling_rate)
    criterion_oim.cuda()
    criterion_veri.cuda()

    # criterion_oim.cuda()
    # criterion_veri.cuda()

    # Optimizer
    optimizer1 = []
    # cnn_model_flow = [cnn_model_flow1, cnn_model_flow2]
    for cnn_model in range(len(cnn_model_flow)):
        base_param_ids = set(map(id, cnn_model_flow[cnn_model].module.base.parameters()))
        new_params = [p for p in cnn_model_flow[cnn_model].module.parameters() if
                    id(p) not in base_param_ids]

        param_groups1 = [
            {'params': cnn_model_flow[cnn_model].module.base.parameters(), 'lr_mult': 1},
            {'params': new_params, 'lr_mult': 1}]

        optimizer1.append(torch.optim.SGD(param_groups1, lr=args.lr1,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=True))
    
    param_groups2 = [
        {'params': att_model.parameters(), 'lr_mult': 1},
        {'params': classifier_model.parameters(), 'lr_mult': 1}]                        
    optimizer2 = torch.optim.SGD(param_groups2, lr=args.lr2,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)
    # optimizer1 = torch.optim.Adam(param_groups1, lr=args.lr1, weight_decay=args.weight_decay)
    #
    # optimizer2 = torch.optim.Adam(param_groups2, lr=args.lr2, weight_decay=args.weight_decay)

    # Schedule Learning rate
    def adjust_lr1(epoch):
        lr = args.lr1 * (0.1 ** (epoch/args.lr1step))
        print(lr)
        for o in optimizer1:
            for g in o.param_groups:
                g['lr'] = lr * g.get('lr_mult', 1)

    def adjust_lr2(epoch):
        lr = args.lr2 * (0.01 ** (epoch//args.lr2step))
        print(lr)
        for g in optimizer2.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)
        # # peixian:  two attmodel:
        # for o in optimizer2:
        #     for g in o.param_groups:
        #         g['lr'] = lr * g.get('lr_mult', 1)
        # #

    def adjust_lr3(epoch):
        lr = args.lr3 * (0.000001 ** (epoch //args.lr3step))
        print(lr)
        return lr

    # Trainer
    trainer = SEQTrainer(cnn_model_flow, att_model, classifier_model, criterion_veri, criterion_oim, args.lr3, args.flow1rate)


    # Evaluator
    evaluator = ATTEvaluator(cnn_model_flow, att_model, classifier_model, args.flow1rate)

    best_top1 = 0
    if args.evaluate == 1 or args.pretrain == 1:  # evaluate
        for cnn_model in range(len(cnn_model_flow)):
            checkpoint = load_checkpoint(osp.join(args.logs_dir, 'cnnmodel_best_flow' + str(cnn_model) + '.pth.tar'))
            cnn_model_flow[cnn_model].module.load_state_dict(checkpoint['state_dict'])

        checkpoint = load_checkpoint(osp.join(args.logs_dir, 'attmodel_best.pth.tar'))
        att_model.load_state_dict(checkpoint['state_dict'])

        checkpoint = load_checkpoint(osp.join(args.logs_dir, 'clsmodel_best.pth.tar'))
        classifier_model.load_state_dict(checkpoint['state_dict'])

        top1 = evaluator.evaluate(query_loader, gallery_loader, dataset.queryinfo, dataset.galleryinfo)
        # top1 = evaluator.evaluate(query_loader, gallery_loader,dataset.num_tracklet)

    if args.evaluate == 0:
        for epoch in range(args.start_epoch, args.epochs):
            adjust_lr1(epoch)
            adjust_lr2(epoch)
            rate = adjust_lr3(epoch)
            trainer.train(epoch, train_loader, optimizer1, optimizer2, rate,tensorboardWrite)

            if (epoch+1) % 1 == 0 or (epoch+1) == args.epochs:

                top1 = evaluator.evaluate(query_loader, gallery_loader, dataset.queryinfo, dataset.galleryinfo)

                is_best = top1 > best_top1
                if is_best:
                    best_top1 = top1
                for cnn_model in range(len(cnn_model_flow)):
                    save_cnn_checkpoint({
                        'state_dict': cnn_model_flow[cnn_model].module.state_dict(),
                        'epoch': epoch + 1,
                        'best_top1': best_top1,
                    }, is_best, index=cnn_model, fpath=osp.join(args.logs_dir, 'cnn_checkpoint_flow'+str(cnn_model)+'.pth.tar'))

                save_att_checkpoint({
                    'state_dict': att_model.state_dict(),
                    'epoch': epoch + 1,
                    'best_top1': best_top1,
                }, is_best, fpath=osp.join(args.logs_dir, 'att_checkpoint.pth.tar'))

                save_cls_checkpoint({
                    'state_dict': classifier_model.state_dict(),
                    'epoch': epoch + 1,
                    'best_top1': best_top1,
                }, is_best, fpath=osp.join(args.logs_dir, 'cls_checkpoint.pth.tar'))

if torch.cuda.is_available():
    if args.cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    if not args.cuda:
        print("WARNING: It looks like you have a CUDA device, but aren't " +
              "using CUDA.\nRun with --cuda for optimal training speed.")
        torch.set_default_tensor_type('torch.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

if not os.path.exists(args.save_folder):
    os.mkdir(args.save_folder)

sys.stdout = Logger(os.path.join(args.save_folder, 'log.txt'))

def train():
    if args.dataset == 'COCO':
        '''if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(cfg['min_dim'],
                                                          MEANS))'''
    elif args.dataset == 'VOC':
        '''if args.dataset_root == COCO_ROOT:
Esempio n. 29
0
def main():
    global best_prec
    global opt

    if opt['id'] != '':
        model_id = opt['id']
    else:
        model_id = time.strftime("%m_%d_%H-%M-%S")
    sys.stdout = Logger(osp.join(opt['log_dir'], 'log.' + model_id + '.txt'))

    # initialize
    checkpoint_dir = osp.join(opt['checkpoint_dir'], model_id)
    mkdir_if_missing(checkpoint_dir)

    # check gpu
    assert opt['gpus'] is not None

    # set random seed
    cudnn.benchmark = False
    cudnn.deterministic = True
    random.seed(opt['seed'])
    np.random.seed(opt['seed'])
    torch.manual_seed(opt['seed'])
    torch.cuda.manual_seed_all(opt['seed'])

    # load imdb
    train_refdb = get_db('refvg_train_' + opt['model_method'])
    vocab = train_refdb.load_dictionary()
    opt['vocab_size'] = len(vocab)
    val_refdb = get_db('refvg_val_' + opt['model_method'])

    # model, criterion, optimizer
    model = SGReason(opt)
    model = torch.nn.DataParallel(model).cuda()
    criterion = SoftmaxLoss().cuda()

    optimizer = torch.optim.Adam(list(model.parameters()) +
                                 list(criterion.parameters()),
                                 lr=opt['learning_rate'],
                                 betas=(opt['optim_alpha'], opt['optim_beta']),
                                 eps=opt['optim_epsilon'])

    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.1,
                                  patience=3,
                                  mode='max')

    if opt['evaluate']:
        if osp.isfile(opt['model']):
            model, criterion = load_checkpoint(model, criterion, opt['model'])
            test_refdb = get_db('refvg_test_' + opt['model_method'])
            test_dataset = RefDataset(test_refdb, vocab, opt)
            test_loader = torch.utils.data.DataLoader(
                test_dataset,
                batch_size=opt['batch_size'],
                shuffle=False,
                num_workers=opt['workers'],
                pin_memory=True)
            test_loss, test_prec = validate(test_loader, model, criterion)
            print(test_loss, test_prec)
        else:
            print("=> no checkpoint found at '{}'".format(opt['model']))
        return

    # start training
    epoch_cur = 0
    train_dataset = RefDataset(train_refdb, vocab, opt)
    val_dataset = RefDataset(val_refdb, vocab, opt)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=opt['batch_size'],
                                               shuffle=True,
                                               num_workers=opt['workers'],
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=opt['batch_size'],
                                             shuffle=False,
                                             num_workers=opt['workers'],
                                             pin_memory=True)

    for epoch in range(epoch_cur, opt['max_epochs']):
        train(train_loader, model, criterion, optimizer, epoch)
        val_loss, prec = validate(val_loader, model, criterion, epoch)
        scheduler.step(prec)
        for i, param_group in enumerate(optimizer.param_groups):
            print(float(param_group['lr']))

        is_best = prec >= best_prec
        best_prec = max(best_prec, prec)
        save_checkpoint(
            {
                'model_state_dict': model.state_dict(),
                'crit_state_dict': criterion.state_dict(),
                'optimizer': optimizer.state_dict()
            }, is_best, checkpoint_dir, str(epoch))
Esempio n. 30
0
def main(args):
    ## fix random_seed
    fixRandomSeed(1)

    ## cuda setting
    cudnn.benchmark = True
    cudnn.enabled = True
    device = torch.device('cuda:' + str(args.gpuid))
    torch.cuda.set_device(device)

    ## Logger setting
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))
    print('logs_dir=', args.logs_dir)
    print('args : ', args)

    ## get dataset & dataloader:
    dataset, source_num_classes, source_train_loader, \
    target_train_loader, query_loader, gallery_loader = get_data(args.data_dir, args.source,args.target,
                                                                 args.source_train_path, args.target_train_path,
                                                                 args.source_extension,args.target_extension,
                                                                 args.height, args.width,
                                                                 args.batch_size, args.re, args.workers)

    h, w = map(int, [args.height, args.width])
    input_size_source = (h, w)
    input_size_target = (h, w)

    # cudnn.enabled = True

    # Create Network
    # model = Res_Deeplab(num_classes=args.num_classes)
    model = Res_Deeplab(num_classes=source_num_classes)
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)
    new_params = model.state_dict().copy()

    ## adapte new_params's layers / classes to saved_state_dict
    for i in saved_state_dict:
        i_parts = i.split('.')
        if not args.num_classes == 19 or not i_parts[1] == 'layer5':
            new_params['.'.join(i_parts[1:])] = saved_state_dict[i]

    if args.restore_from[:4] == './mo':
        model.load_state_dict(new_params)
    else:
        model.load_state_dict(saved_state_dict)

    ## set mode = train and moves the params of model to GPU
    model.train()
    model.cuda(args.gpu)

    # cudnn.benchmark = True

    # Init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    # =============================================================================
    #    #for retrain
    #    saved_state_dict_D = torch.load(RESTORE_FROM_D)
    #    model_D.load_state_dict(saved_state_dict_D)
    # =============================================================================

    model_D.train()
    model_D.cuda(args.gpu)

    # if not os.path.exists(args.snapshot_dir):
    #     os.makedirs(args.snapshot_dir)

    if args.source == 'GTA5':
        trainloader = data.DataLoader(GTA5DataSet(
            args.data_dir,
            args.data_list,
            max_iters=args.num_steps * args.iter_size * args.batch_size,
            crop_size=input_size_source,
            scale=True,
            mirror=True,
            mean=IMG_MEAN),
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=args.num_workers,
                                      pin_memory=True)
    else:
        trainloader = data.DataLoader(SYNTHIADataSet(
            args.data_dir,
            args.data_list,
            max_iters=args.num_steps * args.iter_size * args.batch_size,
            crop_size=input_size_source,
            scale=True,
            mirror=True,
            mean=IMG_MEAN),
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=args.num_workers,
                                      pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=True,
        mirror=True,
        mean=IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)

    targetloader_iter = enumerate(targetloader)

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    bce_loss = torch.nn.BCEWithLogitsLoss()
    weighted_bce_loss = WeightedBCEWithLogitsLoss()

    interp_source = nn.Upsample(size=(input_size_source[1],
                                      input_size_source[0]),
                                mode='bilinear',
                                align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)

    # Labels for Adversarial Training
    source_label = 0
    target_label = 1

    for i_iter in range(args.num_steps):

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        damping = (1 - i_iter / NUM_STEPS)

        # ======================================================================================
        # train G
        # ======================================================================================

        # Remove Grads in D
        for param in model_D.parameters():
            param.requires_grad = False

        # Train with Source
        _, batch = next(trainloader_iter)
        images_s, labels_s, _, _, _ = batch
        images_s = Variable(images_s).cuda(args.gpu)
        pred_source1, pred_source2 = model(images_s)
        pred_source1 = interp_source(pred_source1)
        pred_source2 = interp_source(pred_source2)

        # Segmentation Loss
        loss_seg = (loss_calc(pred_source1, labels_s, args.gpu) +
                    loss_calc(pred_source2, labels_s, args.gpu))
        loss_seg.backward()

        # Train with Target
        _, batch = next(targetloader_iter)
        images_t, _, _, _ = batch
        images_t = Variable(images_t).cuda(args.gpu)

        pred_target1, pred_target2 = model(images_t)
        pred_target1 = interp_target(pred_target1)
        pred_target2 = interp_target(pred_target2)

        weight_map = weightmap(F.softmax(pred_target1, dim=1),
                               F.softmax(pred_target2, dim=1))

        D_out = interp_target(
            model_D(F.softmax(pred_target1 + pred_target2, dim=1)))

        # Adaptive Adversarial Loss
        if (i_iter > PREHEAT_STEPS):
            loss_adv = weighted_bce_loss(
                D_out,
                Variable(
                    torch.FloatTensor(
                        D_out.data.size()).fill_(source_label)).cuda(args.gpu),
                weight_map, Epsilon, Lambda_local)
        else:
            loss_adv = bce_loss(
                D_out,
                Variable(
                    torch.FloatTensor(
                        D_out.data.size()).fill_(source_label)).cuda(args.gpu))

        loss_adv = loss_adv * Lambda_adv * damping
        loss_adv.backward()

        # Weight Discrepancy Loss
        W5 = None
        W6 = None
        if args.model == 'ResNet':

            for (w5, w6) in zip(model.layer5.parameters(),
                                model.layer6.parameters()):
                if W5 is None and W6 is None:
                    W5 = w5.view(-1)
                    W6 = w6.view(-1)
                else:
                    W5 = torch.cat((W5, w5.view(-1)), 0)
                    W6 = torch.cat((W6, w6.view(-1)), 0)

        loss_weight = (torch.matmul(W5, W6) /
                       (torch.norm(W5) * torch.norm(W6)) + 1
                       )  # +1 is for a positive loss
        loss_weight = loss_weight * Lambda_weight * damping * 2
        loss_weight.backward()

        # ======================================================================================
        # train D
        # ======================================================================================

        # Bring back Grads in D
        for param in model_D.parameters():
            param.requires_grad = True

        # Train with Source
        pred_source1 = pred_source1.detach()
        pred_source2 = pred_source2.detach()

        D_out_s = interp_source(
            model_D(F.softmax(pred_source1 + pred_source2, dim=1)))

        loss_D_s = bce_loss(
            D_out_s,
            Variable(
                torch.FloatTensor(
                    D_out_s.data.size()).fill_(source_label)).cuda(args.gpu))

        loss_D_s.backward()

        # Train with Target
        pred_target1 = pred_target1.detach()
        pred_target2 = pred_target2.detach()
        weight_map = weight_map.detach()

        D_out_t = interp_target(
            model_D(F.softmax(pred_target1 + pred_target2, dim=1)))

        # Adaptive Adversarial Loss
        if (i_iter > PREHEAT_STEPS):
            loss_D_t = weighted_bce_loss(
                D_out_t,
                Variable(
                    torch.FloatTensor(
                        D_out_t.data.size()).fill_(target_label)).cuda(
                            args.gpu), weight_map, Epsilon, Lambda_local)
        else:
            loss_D_t = bce_loss(
                D_out_t,
                Variable(
                    torch.FloatTensor(
                        D_out_t.data.size()).fill_(target_label)).cuda(
                            args.gpu))

        loss_D_t.backward()

        optimizer.step()
        optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:6d}/{1:6d}, loss_seg = {2:.4f} loss_adv = {3:.4f}, loss_weight = {4:.4f}, loss_D_s = {5:.4f} loss_D_t = {6:.4f}'
            .format(i_iter, args.num_steps, loss_seg, loss_adv, loss_weight,
                    loss_D_s, loss_D_t))

        f_loss = open(osp.join(args.snapshot_dir, 'loss.txt'), 'a')
        f_loss.write('{0:.4f} {1:.4f} {2:.4f} {3:.4f} {4:.4f}\n'.format(
            loss_seg, loss_adv, loss_weight, loss_D_s, loss_D_t))
        f_loss.close()

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D.pth'))

    ## create dataloader
    dataset, source_num_classes, source_train_loader, target_train_loader, query_loader, gallery_loader = get_data(
        args.data_dir, args.source, args.target, args.source_train_path,
        args.target_train_path, args.source_extension, args.target_extension,
        args.height, args.width, args.batch_size, args.re, args.workers)
    h, w = map(int, args.input_size_source.split(','))
    input_size_source = (h, w)
    input_size_target = (h, w)