コード例 #1
0
ファイル: linear.py プロジェクト: bzqweiyi/notears
            if h_new > 0.25 * h:
                rho *= 10
            else:
                break
        w_est, h = w_new, h_new
        alpha += rho * h
        if h <= h_tol or rho >= rho_max:
            break
    W_est = _adj(w_est)
    W_est[np.abs(W_est) < w_threshold] = 0
    return W_est


if __name__ == '__main__':
    import utils
    utils.set_random_seed(1)

    n, d, s0, graph_type, sem_type = 100, 20, 20, 'ER', 'gauss'
    B_true = utils.simulate_dag(d, s0, graph_type)
    W_true = utils.simulate_parameter(B_true)
    np.savetxt('W_true.csv', W_true, delimiter=',')

    X = utils.simulate_linear_sem(W_true, n, sem_type)
    np.savetxt('X.csv', X, delimiter=',')

    W_est = notears_linear(X, lambda1=0.1, loss_type='l2')
    assert utils.is_dag(W_est)
    np.savetxt('W_est.csv', W_est, delimiter=',')
    acc = utils.count_accuracy(B_true, W_est != 0)
    print(acc)
コード例 #2
0
def main():
    #### setup options of three networks
    parser = argparse.ArgumentParser()
    parser.add_argument("-opt", type=str, help="Path to option YMAL file.")
    parser.add_argument("--launcher",
                        choices=["none", "pytorch"],
                        default="none",
                        help="job launcher")
    parser.add_argument("--local_rank", type=int, default=0)
    args = parser.parse_args()
    opt = option.parse(args.opt, is_train=True)

    # convert to NoneDict, which returns None for missing keys
    opt = option.dict_to_nonedict(opt)

    # choose small opt for SFTMD test, fill path of pre-trained model_F
    #### set random seed
    seed = opt["train"]["manual_seed"]
    if seed is None:
        seed = random.randint(1, 10000)
    util.set_random_seed(seed)

    # load PCA matrix of enough kernel
    print("load PCA matrix")
    pca_matrix = torch.load(opt["pca_matrix_path"],
                            map_location=lambda storage, loc: storage)
    print("PCA matrix shape: {}".format(pca_matrix.shape))

    #### distributed training settings
    if args.launcher == "none":  # disabled distributed training
        opt["dist"] = False
        opt["dist"] = False
        rank = -1
        print("Disabled distributed training.")
    else:
        opt["dist"] = True
        opt["dist"] = True
        init_dist()
        world_size = (
            torch.distributed.get_world_size()
        )  # Returns the number of processes in the current process group
        rank = torch.distributed.get_rank(
        )  # Returns the rank of current process group

    torch.backends.cudnn.benchmark = True
    # torch.backends.cudnn.deterministic = True

    ###### Predictor&Corrector train ######

    #### loading resume state if exists
    if opt["path"].get("resume_state", None):
        # distributed resuming: all load into default GPU
        device_id = torch.cuda.current_device()
        resume_state = torch.load(
            opt["path"]["resume_state"],
            map_location=lambda storage, loc: storage.cuda(device_id),
        )
        option.check_resume(opt, resume_state["iter"])  # check resume options
    else:
        resume_state = None

    #### mkdir and loggers
    if rank <= 0:  # normal training (rank -1) OR distributed training (rank 0-7)
        if resume_state is None:
            # Predictor path
            util.mkdir_and_rename(
                opt["path"]
                ["experiments_root"])  # rename experiment folder if exists
            util.mkdirs(
                (path for key, path in opt["path"].items()
                 if not key == "experiments_root"
                 and "pretrain_model" not in key and "resume" not in key))
            os.system("rm ./log")
            os.symlink(os.path.join(opt["path"]["experiments_root"], ".."),
                       "./log")

        # config loggers. Before it, the log will not work
        util.setup_logger(
            "base",
            opt["path"]["log"],
            "train_" + opt["name"],
            level=logging.INFO,
            screen=False,
            tofile=True,
        )
        util.setup_logger(
            "val",
            opt["path"]["log"],
            "val_" + opt["name"],
            level=logging.INFO,
            screen=False,
            tofile=True,
        )
        logger = logging.getLogger("base")
        logger.info(option.dict2str(opt))
        # tensorboard logger
        if opt["use_tb_logger"] and "debug" not in opt["name"]:
            version = float(torch.__version__[0:3])
            if version >= 1.1:  # PyTorch 1.1
                from torch.utils.tensorboard import SummaryWriter
            else:
                logger.info(
                    "You are using PyTorch {}. Tensorboard will use [tensorboardX]"
                    .format(version))
                from tensorboardX import SummaryWriter
            tb_logger = SummaryWriter(
                log_dir="log/{}/tb_logger/".format(opt["name"]))
    else:
        util.setup_logger("base",
                          opt["path"]["log"],
                          "train",
                          level=logging.INFO,
                          screen=False)
        logger = logging.getLogger("base")

    torch.backends.cudnn.benchmark = True
    # torch.backends.cudnn.deterministic = True

    #### create train and val dataloader
    dataset_ratio = 200  # enlarge the size of each epoch
    for phase, dataset_opt in opt["datasets"].items():
        if phase == "train":
            train_set = create_dataset(dataset_opt)
            train_size = int(
                math.ceil(len(train_set) / dataset_opt["batch_size"]))
            total_iters = int(opt["train"]["niter"])
            total_epochs = int(math.ceil(total_iters / train_size))
            if opt["dist"]:
                train_sampler = DistIterSampler(train_set, world_size, rank,
                                                dataset_ratio)
                total_epochs = int(
                    math.ceil(total_iters / (train_size * dataset_ratio)))
            else:
                train_sampler = None
            train_loader = create_dataloader(train_set, dataset_opt, opt,
                                             train_sampler)
            if rank <= 0:
                logger.info(
                    "Number of train images: {:,d}, iters: {:,d}".format(
                        len(train_set), train_size))
                logger.info("Total epochs needed: {:d} for iters {:,d}".format(
                    total_epochs, total_iters))
        elif phase == "val":
            val_set = create_dataset(dataset_opt)
            val_loader = create_dataloader(val_set, dataset_opt, opt, None)
            if rank <= 0:
                logger.info("Number of val images in [{:s}]: {:d}".format(
                    dataset_opt["name"], len(val_set)))
        else:
            raise NotImplementedError(
                "Phase [{:s}] is not recognized.".format(phase))
    assert train_loader is not None
    assert val_loader is not None

    #### create model
    model = create_model(opt)  # load pretrained model of SFTMD

    #### resume training
    if resume_state:
        logger.info("Resuming training from epoch: {}, iter: {}.".format(
            resume_state["epoch"], resume_state["iter"]))

        start_epoch = resume_state["epoch"]
        current_step = resume_state["iter"]
        model.resume_training(resume_state)  # handle optimizers and schedulers
    else:
        current_step = 0
        start_epoch = 0

    prepro = util.SRMDPreprocessing(scale=opt["scale"],
                                    pca_matrix=pca_matrix,
                                    cuda=True,
                                    **opt["degradation"])
    #### training
    logger.info("Start training from epoch: {:d}, iter: {:d}".format(
        start_epoch, current_step))
    for epoch in range(start_epoch, total_epochs + 1):
        if opt["dist"]:
            train_sampler.set_epoch(epoch)
        for _, train_data in enumerate(train_loader):
            current_step += 1

            if current_step > total_iters:
                break

            LR_img, ker_map = prepro(train_data["GT"])
            LR_img = (LR_img * 255).round() / 255

            model.feed_data(LR_img, train_data["GT"], ker_map)
            model.optimize_parameters(current_step)
            model.update_learning_rate(current_step,
                                       warmup_iter=opt["train"]["warmup_iter"])
            visuals = model.get_current_visuals()

            if current_step % opt["logger"]["print_freq"] == 0:
                logs = model.get_current_log()
                message = "<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> ".format(
                    epoch, current_step, model.get_current_learning_rate())
                for k, v in logs.items():
                    message += "{:s}: {:.4e} ".format(k, v)
                    # tensorboard logger
                    if opt["use_tb_logger"] and "debug" not in opt["name"]:
                        if rank <= 0:
                            tb_logger.add_scalar(k, v, current_step)
                if rank == 0:
                    logger.info(message)

            # validation, to produce ker_map_list(fake)
            if current_step % opt["train"]["val_freq"] == 0 and rank <= 0:
                avg_psnr = 0.0
                idx = 0
                for _, val_data in enumerate(val_loader):

                    # LR_img, ker_map = prepro(val_data['GT'])
                    LR_img = val_data["LQ"]
                    lr_img = util.tensor2img(
                        LR_img)  # save LR image for reference

                    # valid Predictor
                    model.feed_data(LR_img, val_data["GT"])
                    model.test()
                    visuals = model.get_current_visuals()

                    # Save images for reference
                    img_name = os.path.splitext(
                        os.path.basename(val_data["LQ_path"][0]))[0]
                    img_dir = os.path.join(opt["path"]["val_images"], img_name)
                    # img_dir = os.path.join(opt['path']['val_images'], str(current_step), '_', str(step))
                    util.mkdir(img_dir)
                    save_lr_path = os.path.join(img_dir,
                                                "{:s}_LR.png".format(img_name))
                    util.save_img(lr_img, save_lr_path)

                    sr_img = util.tensor2img(visuals["SR"])  # uint8
                    gt_img = util.tensor2img(visuals["GT"])  # uint8

                    save_img_path = os.path.join(
                        img_dir,
                        "{:s}_{:d}.png".format(img_name, current_step))
                    util.save_img(sr_img, save_img_path)

                    # calculate PSNR
                    crop_size = opt["scale"]
                    gt_img = gt_img / 255.0
                    sr_img = sr_img / 255.0
                    cropped_sr_img = sr_img[crop_size:-crop_size,
                                            crop_size:-crop_size, :]
                    cropped_gt_img = gt_img[crop_size:-crop_size,
                                            crop_size:-crop_size, :]

                    avg_psnr += util.calculate_psnr(cropped_sr_img * 255,
                                                    cropped_gt_img * 255)
                    idx += 1

                avg_psnr = avg_psnr / idx

                # log
                logger.info("# Validation # PSNR: {:.6f}".format(avg_psnr))
                logger_val = logging.getLogger("val")  # validation logger
                logger_val.info(
                    "<epoch:{:3d}, iter:{:8,d}, psnr: {:.6f}".format(
                        epoch, current_step, avg_psnr))
                # tensorboard logger
                if opt["use_tb_logger"] and "debug" not in opt["name"]:
                    tb_logger.add_scalar("psnr", avg_psnr, current_step)

            #### save models and training states
            if current_step % opt["logger"]["save_checkpoint_freq"] == 0:
                if rank <= 0:
                    logger.info("Saving models and training states.")
                    model.save(current_step)
                    model.save_training_state(epoch, current_step)

    if rank <= 0:
        logger.info("Saving the final model.")
        model.save("latest")
        logger.info("End of Predictor and Corrector training.")
    tb_logger.close()
コード例 #3
0
def worker_init_fn(worker_id, num_threads=1):
    utils.set_random_seed(worker_id)
    utils.reset_cpu_threads(num_threads)
コード例 #4
0
ファイル: train.py プロジェクト: zwvews/dgl-lifesci
def main(rank, args):
    """
    Parameters
    ----------
    rank : int
        Subprocess id
    args : dict
        Configuration
    """
    if rank == 0:
        t1 = time.time()

    set_random_seed(args['seed'])
    # Remove the line below will result in problems for multiprocess
    torch.set_num_threads(1)

    # Setup dataset and data loader
    dataset = MoleculeDataset(args['dataset'],
                              args['order'], ['train', 'val'],
                              subset_id=rank,
                              n_subsets=args['num_processes'])

    # Note that currently the batch size for the loaders should only be 1.
    train_loader = DataLoader(dataset.train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=dataset.collate)
    val_loader = DataLoader(dataset.val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=dataset.collate)

    if rank == 0:
        try:
            from tensorboardX import SummaryWriter
            writer = SummaryWriter(args['log_dir'])
        except ImportError:
            print(
                'If you want to use tensorboard, install tensorboardX with pip.'
            )
            writer = None
        train_printer = Printer(args['nepochs'], len(dataset.train_set),
                                args['batch_size'], writer)
        val_printer = Printer(args['nepochs'], len(dataset.val_set),
                              args['batch_size'])
    else:
        val_printer = None

    # Initialize model
    model = DGMG(atom_types=dataset.atom_types,
                 bond_types=dataset.bond_types,
                 node_hidden_size=args['node_hidden_size'],
                 num_prop_rounds=args['num_propagation_rounds'],
                 dropout=args['dropout'])

    if args['num_processes'] == 1:
        from utils import Optimizer
        optimizer = Optimizer(args['lr'],
                              Adam(model.parameters(), lr=args['lr']))
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(
            args['num_processes'], args['lr'],
            Adam(model.parameters(), lr=args['lr']))

    if rank == 0:
        t2 = time.time()
    best_val_prob = 0

    # Training
    for epoch in range(args['nepochs']):
        model.train()
        if rank == 0:
            print('Training')

        for i, data in enumerate(train_loader):
            log_prob = model(actions=data, compute_log_prob=True)
            prob = log_prob.detach().exp()

            loss_averaged = -log_prob
            prob_averaged = prob
            optimizer.backward_and_step(loss_averaged)
            if rank == 0:
                train_printer.update(epoch + 1, loss_averaged.item(),
                                     prob_averaged.item())

        synchronize(args['num_processes'])

        # Validation
        val_log_prob = evaluate(epoch, model, val_loader, val_printer)
        if args['num_processes'] > 1:
            dist.all_reduce(val_log_prob, op=dist.ReduceOp.SUM)
        val_log_prob /= args['num_processes']
        # Strictly speaking, the computation of probability here is different from what is
        # performed on the training set as we first take an average of log likelihood and then
        # take the exponentiation. By Jensen's inequality, the resulting value is then a
        # lower bound of the real probabilities.
        val_prob = (-val_log_prob).exp().item()
        val_log_prob = val_log_prob.item()
        if val_prob >= best_val_prob:
            if rank == 0:
                torch.save({'model_state_dict': model.state_dict()},
                           args['checkpoint_dir'])
                print(
                    'Old val prob {:.10f} | new val prob {:.10f} | model saved'
                    .format(best_val_prob, val_prob))
            best_val_prob = val_prob
        elif epoch >= args['warmup_epochs']:
            optimizer.decay_lr()

        if rank == 0:
            print('Validation')
            if writer is not None:
                writer.add_scalar('validation_log_prob', val_log_prob, epoch)
                writer.add_scalar('validation_prob', val_prob, epoch)
                writer.add_scalar('lr', optimizer.lr, epoch)
            print('Validation log prob {:.4f} | prob {:.10f}'.format(
                val_log_prob, val_prob))

        synchronize(args['num_processes'])

    if rank == 0:
        t3 = time.time()
        print('It took {} to setup.'.format(datetime.timedelta(seconds=t2 -
                                                               t1)))
        print('It took {} to finish training.'.format(
            datetime.timedelta(seconds=t3 - t2)))
        print(
            '--------------------------------------------------------------------------'
        )
        print('On average, an epoch takes {}.'.format(
            datetime.timedelta(seconds=(t3 - t2) / args['nepochs'])))
コード例 #5
0
def main():
    config = get_config()

    if not config.use_random_seed:
        set_random_seed(1)

    # A list of budgets to query, e.g. [100, 200, 300].
    # Then the labeled pool will obtain 100 new samples each round, until 300 budgets are all used.
    budget_list = get_budget_list_from_config(config)

    # It contains all directory/save_paths that will be used
    paths_dict = prepare_active_learning_dir_from_config(config, budget_list)

    dataset_info = prepare_dataset_from_config(
        config,
        paths_dict['data_download_path'],
        paths_dict['data_save_path']
    )

    time_stamp = time.strftime("%Y-%m-%d %H:%M")

    # Save the train set details for later analysis
    if not os.path.exists(paths_dict['trainset_info_path']):
        torch.save(
            dataset_info.trainset_info,
            paths_dict['trainset_info_path']
        )

    # The training configurations including backbone architecture, lr, batch size..
    trainer_config = get_trainer_config(
        config.data,
        config.training_method,
        config.train_mode
    )

    discovered_samples = dataset_info.discovered_samples
    discovered_classes = dataset_info.discovered_classes

    # Trainer is the main class for training and querying
    trainer = Trainer(
        training_method=config.training_method,
        trainer_config=trainer_config,
        dataset_info=dataset_info
    )

    for i, b in enumerate(budget_list):
        # b is the budget for independent mode, need to adjust it for sequential mode
        if config.active_query_scheme == 'sequential':
            if i > 0:
                budget = b - budget_list[i-1]
            else:
                budget = b
        else:
            budget = b

        new_discovered_samples, new_discovered_classes = trainer.query(
            discovered_samples,
            discovered_classes,
            budget=budget,
            query_method=config.query_method,
            query_result_path=paths_dict['active_query_results'][b],
            verbose=config.verbose
        )

        if config.active_query_scheme == 'sequential':
            print("Using sequential mode, we updated the discovered samples")
            discovered_samples, discovered_classes = new_discovered_samples, new_discovered_classes
        else:
            print("Using independent mode, we do not update the initial labeled pool.")

        trainer.train(
            new_discovered_samples,
            new_discovered_classes,
            ckpt_path=paths_dict['active_ckpt_results'][b],
            verbose=config.verbose
        )

        closed_set_test_acc = trainer.eval_closed_set(
            new_discovered_classes,
            result_path=paths_dict['active_test_results'][b],
            verbose=config.verbose
        )
コード例 #6
0
ファイル: train.py プロジェクト: AyanKumarBhunia/STDF-PyTorch
def main():
    # ==========
    # parameters
    # ==========

    opts_dict = receive_arg()
    rank = opts_dict['train']['rank']
    unit = opts_dict['train']['criterion']['unit']
    num_iter = int(opts_dict['train']['num_iter'])
    interval_print = int(opts_dict['train']['interval_print'])
    interval_val = int(opts_dict['train']['interval_val'])

    # ==========
    # init distributed training
    # ==========

    if opts_dict['train']['is_dist']:
        utils.init_dist(local_rank=rank, backend='nccl')

    # TO-DO: load resume states if exists
    pass

    # ==========
    # create logger
    # ==========

    if rank == 0:
        log_dir = op.join("exp", opts_dict['train']['exp_name'])
        utils.mkdir(log_dir)
        log_fp = open(opts_dict['train']['log_path'], 'w')

        # log all parameters
        msg = (f"{'<' * 10} Hello {'>' * 10}\n"
               f"Timestamp: [{utils.get_timestr()}]\n"
               f"\n{'<' * 10} Options {'>' * 10}\n"
               f"{utils.dict2str(opts_dict)}")
        print(msg)
        log_fp.write(msg + '\n')
        log_fp.flush()

    # ==========
    # TO-DO: init tensorboard
    # ==========

    pass

    # ==========
    # fix random seed
    # ==========

    seed = opts_dict['train']['random_seed']
    # >I don't know why should rs + rank
    utils.set_random_seed(seed + rank)

    # ==========
    # Ensure reproducibility or Speed up
    # Ensure reproducibility or Speed up
    # ==========

    #torch.backends.cudnn.benchmark = False  # if reproduce
    #torch.backends.cudnn.deterministic = True  # if reproduce
    torch.backends.cudnn.benchmark = True  # speed up

    # ==========
    # create train and val data prefetchers
    # ==========

    # create datasets
    train_ds_type = opts_dict['dataset']['train']['type']
    val_ds_type = opts_dict['dataset']['val']['type']
    radius = opts_dict['network']['radius']
    assert train_ds_type in dataset.__all__, \
        "Not implemented!"
    assert val_ds_type in dataset.__all__, \
        "Not implemented!"
    train_ds_cls = getattr(dataset, train_ds_type)
    val_ds_cls = getattr(dataset, val_ds_type)
    train_ds = train_ds_cls(opts_dict=opts_dict['dataset']['train'],
                            radius=radius)
    val_ds = val_ds_cls(opts_dict=opts_dict['dataset']['val'], radius=radius)

    # create datasamplers
    train_sampler = utils.DistSampler(
        dataset=train_ds,
        num_replicas=opts_dict['train']['num_gpu'],
        rank=rank,
        ratio=opts_dict['dataset']['train']['enlarge_ratio'])
    val_sampler = None  # no need to sample val data

    # create dataloaders
    train_loader = utils.create_dataloader(
        dataset=train_ds,
        opts_dict=opts_dict,
        sampler=train_sampler,
        phase='train',
        seed=opts_dict['train']['random_seed'])
    val_loader = utils.create_dataloader(dataset=val_ds,
                                         opts_dict=opts_dict,
                                         sampler=val_sampler,
                                         phase='val')
    assert train_loader is not None

    batch_size = opts_dict['dataset']['train']['batch_size_per_gpu'] * \
        opts_dict['train']['num_gpu']  # divided by all GPUs
    num_iter_per_epoch = math.ceil(len(train_ds) * \
        opts_dict['dataset']['train']['enlarge_ratio'] / batch_size)
    num_epoch = math.ceil(num_iter / num_iter_per_epoch)
    val_num = len(val_ds)

    # create dataloader prefetchers
    tra_prefetcher = utils.CPUPrefetcher(train_loader)
    val_prefetcher = utils.CPUPrefetcher(val_loader)

    # ==========
    # create model
    # ==========

    model = MFVQE(opts_dict=opts_dict['network'])

    model = model.to(rank)
    # model.load_state_dict(torch.load('~/STDF-PyTorch/Code/exp/QP27/MFQEv2_R3_enlarge300x/ckp_220000.pt')['state_dict'])

    if opts_dict['train']['is_dist']:
        model = DDP(model, device_ids=[rank])
    """
    # load pre-trained generator
    ckp_path = opts_dict['network']['stdf']['load_path']
    checkpoint = torch.load(ckp_path)
    state_dict = checkpoint['state_dict']
    if ('module.' in list(state_dict.keys())[0]) and (not opts_dict['train']['is_dist']):  # multi-gpu pre-trained -> single-gpu training
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove module
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)
        print(f'loaded from {ckp_path}')
    elif ('module.' not in list(state_dict.keys())[0]) and (opts_dict['train']['is_dist']):  # single-gpu pre-trained -> multi-gpu training
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = 'module.' + k  # add module
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)
        print(f'loaded from {ckp_path}')
    else:  # the same way of training
        model.load_state_dict(state_dict)
        print(f'loaded from {ckp_path}')
    """

    # ==========
    # define loss func & optimizer & scheduler & scheduler & criterion
    # ==========

    # define loss func
    assert opts_dict['train']['loss'].pop('type') == 'CharbonnierLoss', \
        "Not implemented."
    loss_func = utils.CharbonnierLoss(**opts_dict['train']['loss'])

    # define optimizer
    assert opts_dict['train']['optim'].pop('type') == 'Adam', \
        "Not implemented."
    optimizer = optim.Adam(model.parameters(), **opts_dict['train']['optim'])

    # define scheduler
    if opts_dict['train']['scheduler']['is_on']:
        assert opts_dict['train']['scheduler'].pop('type') == \
            'CosineAnnealingRestartLR', "Not implemented."
        del opts_dict['train']['scheduler']['is_on']
        scheduler = utils.CosineAnnealingRestartLR(
            optimizer, **opts_dict['train']['scheduler'])
        opts_dict['train']['scheduler']['is_on'] = True

    # define criterion
    assert opts_dict['train']['criterion'].pop('type') == \
        'PSNR', "Not implemented."
    criterion = utils.PSNR()

    #

    start_iter = 0  # should be restored
    start_epoch = start_iter // num_iter_per_epoch

    # display and log
    if rank == 0:
        msg = (f"\n{'<' * 10} Dataloader {'>' * 10}\n"
               f"total iters: [{num_iter}]\n"
               f"total epochs: [{num_epoch}]\n"
               f"iter per epoch: [{num_iter_per_epoch}]\n"
               f"val sequence: [{val_num}]\n"
               f"start from iter: [{start_iter}]\n"
               f"start from epoch: [{start_epoch}]")
        print(msg)
        log_fp.write(msg + '\n')
        log_fp.flush()

    # ==========
    # evaluate original performance, e.g., PSNR before enhancement
    # ==========

    vid_num = val_ds.get_vid_num()
    if opts_dict['train']['pre-val'] and rank == 0:
        msg = f"\n{'<' * 10} Pre-evaluation {'>' * 10}"
        print(msg)
        log_fp.write(msg + '\n')

        per_aver_dict = {}
        for i in range(vid_num):
            per_aver_dict[i] = utils.Counter()
        pbar = tqdm(total=val_num, ncols=opts_dict['train']['pbar_len'])

        # fetch the first batch
        val_prefetcher.reset()
        val_data = val_prefetcher.next()

        while val_data is not None:
            # get data
            gt_data = val_data['gt'].to(rank)  # (B [RGB] H W)
            lq_data = val_data['lq'].to(rank)  # (B T [RGB] H W)
            index_vid = val_data['index_vid'].item()
            name_vid = val_data['name_vid'][0]  # bs must be 1!
            b, _, _, _, _ = lq_data.shape

            # eval
            batch_perf = np.mean([
                criterion(lq_data[i, radius, ...], gt_data[i])
                for i in range(b)
            ])  # bs must be 1!

            # log
            per_aver_dict[index_vid].accum(volume=batch_perf)

            # display
            pbar.set_description("{:s}: [{:.3f}] {:s}".format(
                name_vid, batch_perf, unit))
            pbar.update()

            # fetch next batch
            val_data = val_prefetcher.next()

        pbar.close()

        # log
        ave_performance = np.mean([
            per_aver_dict[index_vid].get_ave() for index_vid in range(vid_num)
        ])
        msg = "> ori performance: [{:.3f}] {:s}".format(ave_performance, unit)
        print(msg)
        log_fp.write(msg + '\n')
        log_fp.flush()

    if opts_dict['train']['is_dist']:
        torch.distributed.barrier()  # all processes wait for ending

    if rank == 0:
        msg = f"\n{'<' * 10} Training {'>' * 10}"
        print(msg)
        log_fp.write(msg + '\n')

        # create timer
        total_timer = utils.Timer()  # total tra + val time of each epoch

    # ==========
    # start training + validation (test)
    # ==========

    model.train()
    num_iter_accum = start_iter
    for current_epoch in range(start_epoch, num_epoch + 1):
        # shuffle distributed subsamplers before each epoch
        if opts_dict['train']['is_dist']:
            train_sampler.set_epoch(current_epoch)

        # fetch the first batch
        tra_prefetcher.reset()
        train_data = tra_prefetcher.next()

        # train this epoch
        while train_data is not None:

            # over sign
            num_iter_accum += 1
            print(num_iter_accum)
            if num_iter_accum > num_iter:
                break

            # get data
            gt_data = train_data['gt'].to(rank)  # (B [RGB] H W)
            lq_data = train_data['lq'].to(rank)  # (B T [RGB] H W)
            b, _, c, _, _ = lq_data.shape
            input_data = torch.cat([lq_data[:, :, i, ...] for i in range(c)],
                                   dim=1)
            # B [R1 ... R7 G1 ... G7 B1 ... B7] H W
            enhanced_data = model(input_data)

            # get loss
            optimizer.zero_grad()  # zero grad
            loss = torch.mean(
                torch.stack([
                    loss_func(enhanced_data[i], gt_data[i]) for i in range(b)
                ]))  # cal loss
            loss.backward()  # cal grad
            optimizer.step()  # update parameters

            # update learning rate
            if opts_dict['train']['scheduler']['is_on']:
                scheduler.step()  # should after optimizer.step()

            if (num_iter_accum % interval_print == 0) and (rank == 0):
                # display & log
                lr = optimizer.param_groups[0]['lr']
                loss_item = loss.item()
                msg = (f"iter: [{num_iter_accum}]/{num_iter}, "
                       f"epoch: [{current_epoch}]/{num_epoch - 1}, "
                       "lr: [{:.3f}]x1e-4, loss: [{:.4f}]".format(
                           lr * 1e4, loss_item))
                print(msg)
                log_fp.write(msg + '\n')

            if ((num_iter_accum % interval_val == 0) or \
                (num_iter_accum == num_iter)) and (rank == 0):
                # save model
                checkpoint_save_path = (
                    f"{opts_dict['train']['checkpoint_save_path_pre']}"
                    f"{num_iter_accum}"
                    ".pt")
                state = {
                    'num_iter_accum': num_iter_accum,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }
                if opts_dict['train']['scheduler']['is_on']:
                    state['scheduler'] = scheduler.state_dict()
                torch.save(state, checkpoint_save_path)

                # validation
                with torch.no_grad():
                    per_aver_dict = {}
                    for index_vid in range(vid_num):
                        per_aver_dict[index_vid] = utils.Counter()
                    pbar = tqdm(total=val_num,
                                ncols=opts_dict['train']['pbar_len'])

                    # train -> eval
                    model.eval()

                    # fetch the first batch
                    val_prefetcher.reset()
                    val_data = val_prefetcher.next()

                    while val_data is not None:
                        # get data
                        gt_data = val_data['gt'].to(rank)  # (B [RGB] H W)
                        lq_data = val_data['lq'].to(rank)  # (B T [RGB] H W)
                        index_vid = val_data['index_vid'].item()
                        name_vid = val_data['name_vid'][0]  # bs must be 1!
                        b, _, c, _, _ = lq_data.shape
                        input_data = torch.cat(
                            [lq_data[:, :, i, ...] for i in range(c)],
                            dim=1)  # B [R1 ... R7 G1 ... G7 B1 ... B7] H W
                        enhanced_data = model(input_data)  # (B [RGB] H W)

                        # eval
                        batch_perf = np.mean([
                            criterion(enhanced_data[i], gt_data[i])
                            for i in range(b)
                        ])  # bs must be 1!

                        # display
                        pbar.set_description("{:s}: [{:.3f}] {:s}".format(
                            name_vid, batch_perf, unit))
                        pbar.update()

                        # log
                        per_aver_dict[index_vid].accum(volume=batch_perf)

                        # fetch next batch
                        val_data = val_prefetcher.next()

                    # end of val
                    pbar.close()

                    # eval -> train
                    model.train()

                # log
                ave_per = np.mean([
                    per_aver_dict[index_vid].get_ave()
                    for index_vid in range(vid_num)
                ])
                msg = ("> model saved at {:s}\n"
                       "> ave val per: [{:.3f}] {:s}").format(
                           checkpoint_save_path, ave_per, unit)
                print(msg)
                log_fp.write(msg + '\n')
                log_fp.flush()

            if opts_dict['train']['is_dist']:
                torch.distributed.barrier()  # all processes wait for ending

            # fetch next batch
            train_data = tra_prefetcher.next()

        # end of this epoch (training dataloader exhausted)

    # end of all epochs

    # ==========
    # final log & close logger
    # ==========

    if rank == 0:
        total_time = total_timer.get_interval() / 3600
        msg = "TOTAL TIME: [{:.1f}] h".format(total_time)
        print(msg)
        log_fp.write(msg + '\n')

        msg = (f"\n{'<' * 10} Goodbye {'>' * 10}\n"
               f"Timestamp: [{utils.get_timestr()}]")
        print(msg)
        log_fp.write(msg + '\n')

        log_fp.close()
コード例 #7
0
import sys

sys.path.insert(0, os.getcwd())
import numpy as np
import argparse
import torch
from torch import nn
import matplotlib.pyplot as plt
import time
from utils import check_dir, set_random_seed, accuracy, mIoU, get_logger
from models.second_segmentation import Segmentator
from data.transforms import get_transforms_binary_segmentation
from models.pretraining_backbone import ResNet18Backbone
from data.segmentation import DataReaderBinarySegmentation

set_random_seed(0)
global_step = 0


def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument('data_folder',
                        type=str,
                        help="folder containing the data")
    parser.add_argument('weights_init', type=str, default="ImageNet")
    parser.add_argument('--output-root', type=str, default='results')
    parser.add_argument('--lr', type=float, default=0.01, help='learning rate')
    parser.add_argument('--bs', type=int, default=32, help='batch_size')
    parser.add_argument('--size', type=int, default=256, help='image size')
    parser.add_argument('--snapshot-freq',
                        type=int,
コード例 #8
0
    parser.add_argument('--batch-size', type=int, default=128)
    parser.add_argument('--num-epoch', type=int, default=100)
    parser.add_argument('--lr', type=float, default=3e-4)

    # Model
    parser.add_argument('--model-type', type=str, default='basic')
    parser.add_argument('--likelihood-type', type=str, default='bernoulli')
    parser.add_argument('--hidden-channels', type=int, default=256)
    parser.add_argument('--latent-dim', type=int, default=10)
    parser.add_argument('--num-latents', type=int, default=10)
    parser.add_argument('--beta', type=float, default=1.0)
    parser.add_argument('--temperature', type=float, default=0.6)

    args = parser.parse_args()

    set_random_seed(args.seed)

    experiment_root = pathlib.Path('experiments') / args.experiment_name
    args.experiment_root = str(experiment_root)
    if not experiment_root.exists():
        experiment_root.mkdir()

    with open(experiment_root / 'config.json', 'w') as f:
        json.dump(vars(args), f, indent=4, sort_keys=True)

    experiment_log_path = experiment_root / 'logs'
    args.experiment_log_path = str(experiment_log_path)
    if not experiment_log_path.exists():
        experiment_log_path.mkdir()

    experiment_model_path = experiment_root / 'models'
コード例 #9
0
ファイル: run.py プロジェクト: Qijing-Tech/CMSD_baseline
    train_vocab = datadir.train_dataset.vocab
    train_sets = datadir.train_dataset.raw_sets

    dev_vocab = datadir.dev_dataset.vocab
    dev_sets = datadir.dev_dataset.raw_sets

    # # for train
    # train_flat_word_list, train_word_idxes, train_cluster_idxes = get_word_idxes_and_cluster_idxes(train_sets, train_vocab, word2id)
    # train_word_embeddings = embedding[np.array(train_word_idxes)]

    run_times = DataConfig['run_times']
    seed_list = DataConfig['seed_list']
    ari_list, nmi_list, fmi_list = [], [], []
    for i in range(run_times):
        # TODO : model
        set_random_seed(seed=seed_list[i])
        dev_flat_word_list, dev_word_idxes, dev_cluster_idxes = get_word_idxes_and_cluster_idxes(dev_sets, dev_vocab,word2id)
        dev_word_embeddings = embedding[np.array(dev_word_idxes)]
        
        if method_type == 'kmeans':
            # set cluster number by prior
            k_cluster = len(dev_cluster_idxes.keys())
            model = Kmeans(n_cluster=k_cluster, seed=seed_list[i])
            pred_labels = model.predict(dev_word_embeddings)
        elif method_type == 'gmms':
            k_component = len(dev_cluster_idxes.keys())
            model = GMMs(n_component=k_component, seed=seed_list[i])
            pred_labels = model.predict(dev_word_embeddings)
        elif method_type == 'ac':
            k_cluster = len(dev_cluster_idxes.keys())
            model = AC(n_cluster=k_cluster)
コード例 #10
0
        elif inc_option == 'partial+noisy':
            config['partial_unk_rate'] = float(
                para_option.split('-')[1].split('+')[0])
            config['noisy_diff_rate'] = float(
                para_option.split('-')[1].split('+')[1])
            print('partial_unk_rate', config['partial_unk_rate'])
            print('noisy_diff_rate', config['noisy_diff_rate'])
            config['noisy_lambda'] = 1.0
            config['para_option'] = para_option + '-1.0'
        elif inc_option == 'auxiliary':
            config['auxiliary_option'] = para_option.split('-')[1]
            config['k-gram'] = 5
            config['k-gram-freq-gate'] = 2
            config['inc_lambda'] = 1.0
        elif inc_option == 'knowledge':
            config['k-gram'] = int(para_option.split('-')[1])
            config['k-gram-freq-gate'] = 2
            config['inc_lambda'] = 1.0
        elif inc_option == 'constraints':
            config['constraint_option'] = para_option.split('-')[1]
        elif inc_option == 'partial+constraints':
            config['constraint_option'] = para_option.split('-')[1].split(
                '+')[0]
            config['partial_unk_rate'] = float(
                para_option.split('-')[1].split('+')[1])

    print('config', config)
    set_random_seed(config['seed'])
    print('incidental option', config['inc_option'])
    run_test_experiments(config)
コード例 #11
0
ファイル: cross_domain_qa.py プロジェクト: HornHehhf/PABI
    write_data(small_large_qamr, small_large_qamr_file, 'small_large_qamr')
    write_data(test_qamr, test_qamr_file, 'test_qamr')
    write_data(large_qasrl, large_qasrl_file, 'large_qasrl')
    write_data(small_large_qasrl, small_large_qasrl_file, 'small_large_qasrl')
    write_data(test_qasrl, test_qasrl_file, 'test_qasrl')
    write_data(large_qare, large_qare_file, 'large_qare')
    write_data(small_large_qare, small_large_qare_file, 'small_large_qare')
    write_data(test_qare, test_qare_file, 'test_qare')
    write_data(large_newsqa, large_newsqa_file, 'large_newsqa')
    write_data(small_large_newsqa, small_large_newsqa_file,
               'small_large_newsqa')
    write_data(test_newsqa, test_newsqa_file, 'test_newsqa')
    write_data(large_triviaqa, large_triviaqa_file, 'large_triviaqa')
    write_data(small_large_triviaqa, small_large_triviaqa_file,
               'small_large_triviaqa')
    write_data(test_triviaqa, test_triviaqa_file, 'test_triviaqa')


if __name__ == '__main__':
    set_random_seed(666)
    # dev_file = 'QA-data/TriviaQA/TriviaQA_squad_dev.json'
    # unique_dev_file = 'QA-data/TriviaQA/TriviaQA_squad_dev.unique.json'
    # get_unique_answer_data(dev_file, unique_dev_file)
    # input_file_list = ['QA-data/TriviaQA/TriviaQA_squad_train.json', 'QA-data/TriviaQA/TriviaQA_squad_dev.unique.json']
    # output_file = 'QA-data/TriviaQA/triviaqa.all.json'
    # option = 'triviaqa'
    # combine_data(input_file_list, output_file, option)
    generate_qa_data()
    # input_file = 'QA-data/xdomain-QA/small_large_triviaqa.json'
    # get_stats(input_file)
コード例 #12
0
ファイル: DDPGAgent.py プロジェクト: Vaillus/RL_toolkit
 def set_seed(self, seed):
     if seed:
         self.seed = seed
         set_random_seed(self.seed)
         self.function_approximator.set_seed(seed)
コード例 #13
0
ファイル: DDPGAgent.py プロジェクト: Vaillus/RL_toolkit
 def init_seed(self, seed):
     if seed:
         set_random_seed(self.seed)
         return seed
コード例 #14
0
            else:
                anchor_img[s] = x[0]

        yield [
            groundings, 1 - groundings, seen_mask,
            np.array(anchor_aud),
            np.array(anchor_img),
            np.array(anchor_labels)
        ], np.zeros(CURR_BATCH_SIZE)
        j += BATCH_SIZE // 2


if __name__ == '__main__':

    set_gpu()
    random_seed = set_random_seed(int(sys.argv[1]) if len(sys.argv) > 1 else 0)

    BATCH_SIZE = 512
    NUM_EPOCHS = 3
    MARGIN = 0.8

    path = '/home/venkatk/Experiments/New-Experiment/'  #Data/Features/'
    imgnet_path = '/home/venkatk/Experiments/Audio-Visual-Deep-Multimodal-Networks-master/'

    # Load Data -------------------------------------------------------------------------
    imgnet_audio_train, imgnet_audio_val, imgnet_img_train, imgnet_img_val = get_data(
        imgnet_path + 'Data/', [
            'audio_features_train', 'audio_features_val',
            'image_features_train', 'image_features_val'
        ])
    MAX_LEN = imgnet_audio_train[list(imgnet_audio_train)[0]].shape[1]