コード例 #1
0
ファイル: main.py プロジェクト: chiminghui/examples
def main():
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)
コード例 #2
0
                                          normalize,
                                          logger,
                                          it,
                                          num_vis,
                                          tag='val_images')
                del loss_dict, gloss, gout
            else:
                break

        save_epoch = opts.save_epoch

        if epoch % save_epoch == 0 and epoch > save_epoch - 1 and logging:
            print('Saving checkpoint')
            utils.save_model(
                os.path.join(opts.log_dir, 'model' + str(epoch) + '.pt'),
                epoch, netG, netD, opts)
            utils.save_optim(
                os.path.join(opts.log_dir, 'optim' + str(epoch) + '.pt'),
                epoch, optG_temporal, optG_graphic, optD)


if __name__ == '__main__':
    parser = config.init_parser()
    opts, args = parser.parse_args(sys.argv)
    if opts.num_gpu > 1:
        os.environ['MASTER_ADDR'] = 'localhost'
        os.environ['MASTER_PORT'] = '8888'
        mp.spawn(train_gamegan, nprocs=opts.num_gpu, args=(opts, ))
    else:
        train_gamegan(opts.gpu, opts)
コード例 #3
0
ファイル: cli.py プロジェクト: zidanewenqsh/stylegan2-pytorch
def train_from_folder(
    data = './data',
    results_dir = './results',
    models_dir = './models',
    name = 'default',
    new = False,
    load_from = -1,
    image_size = 128,
    network_capacity = 16,
    fmap_max = 512,
    transparent = False,
    batch_size = 5,
    gradient_accumulate_every = 6,
    num_train_steps = 150000,
    learning_rate = 2e-4,
    lr_mlp = 0.1,
    ttur_mult = 1.5,
    rel_disc_loss = False,
    num_workers =  None,
    save_every = 1000,
    evaluate_every = 1000,
    generate = False,
    num_generate = 1,
    generate_interpolation = False,
    interpolation_num_steps = 100,
    save_frames = False,
    num_image_tiles = 8,
    trunc_psi = 0.75,
    mixed_prob = 0.9,
    fp16 = False,
    no_pl_reg = False,
    cl_reg = False,
    fq_layers = [],
    fq_dict_size = 256,
    attn_layers = [],
    no_const = False,
    aug_prob = 0.,
    aug_types = ['translation', 'cutout'],
    top_k_training = False,
    generator_top_k_gamma = 0.99,
    generator_top_k_frac = 0.5,
    dataset_aug_prob = 0.,
    multi_gpus = False,
    calculate_fid_every = None,
    calculate_fid_num_images = 12800,
    clear_fid_cache = False,
    seed = 42,
    log = False
):
    model_args = dict(
        name = name,
        results_dir = results_dir,
        models_dir = models_dir,
        batch_size = batch_size,
        gradient_accumulate_every = gradient_accumulate_every,
        image_size = image_size,
        network_capacity = network_capacity,
        fmap_max = fmap_max,
        transparent = transparent,
        lr = learning_rate,
        lr_mlp = lr_mlp,
        ttur_mult = ttur_mult,
        rel_disc_loss = rel_disc_loss,
        num_workers = num_workers,
        save_every = save_every,
        evaluate_every = evaluate_every,
        num_image_tiles = num_image_tiles,
        trunc_psi = trunc_psi,
        fp16 = fp16,
        no_pl_reg = no_pl_reg,
        cl_reg = cl_reg,
        fq_layers = fq_layers,
        fq_dict_size = fq_dict_size,
        attn_layers = attn_layers,
        no_const = no_const,
        aug_prob = aug_prob,
        aug_types = cast_list(aug_types),
        top_k_training = top_k_training,
        generator_top_k_gamma = generator_top_k_gamma,
        generator_top_k_frac = generator_top_k_frac,
        dataset_aug_prob = dataset_aug_prob,
        calculate_fid_every = calculate_fid_every,
        calculate_fid_num_images = calculate_fid_num_images,
        clear_fid_cache = clear_fid_cache,
        mixed_prob = mixed_prob,
        log = log
    )

    if generate:
        model = Trainer(**model_args)
        model.load(load_from)
        samples_name = timestamped_filename()
        for num in tqdm(range(num_generate)):
            model.evaluate(f'{samples_name}-{num}', num_image_tiles)
        print(f'sample images generated at {results_dir}/{name}/{samples_name}')
        return

    if generate_interpolation:
        model = Trainer(**model_args)
        model.load(load_from)
        samples_name = timestamped_filename()
        model.generate_interpolation(samples_name, num_image_tiles, num_steps = interpolation_num_steps, save_frames = save_frames)
        print(f'interpolation generated at {results_dir}/{name}/{samples_name}')
        return

    world_size = torch.cuda.device_count()

    if world_size == 1 or not multi_gpus:
        run_training(0, 1, model_args, data, load_from, new, num_train_steps, name, seed)
        return

    mp.spawn(run_training,
        args=(world_size, model_args, data, load_from, new, num_train_steps, name, seed),
        nprocs=world_size,
        join=True)
コード例 #4
0
ファイル: train_moons.py プロジェクト: tanghyd/gravflows
if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('--num_gpus', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--interval', type=int, default=1)
    parser.add_argument('--video_title', type=str, default='flow')
    parser.add_argument('--cmap', type=str, default='viridis')
    parser.add_argument('--fps', type=float, default=50)
    parser.add_argument(
        '--cascade',
        default=False,
        action="store_true",
        help="Whether to show intermediate layers in visualiation.")
    args = parser.parse_args()

    assert isinstance(args.num_gpus, int)
    assert args.num_gpus > 0 and args.num_gpus <= torch.cuda.device_count()

    if args.num_gpus == 1:
        train(**args.__dict__),
    else:
        # torch multiprocessing
        mp.spawn(
            train_distributed,  # train function specifically for DDP
            args=(args.batch_size, args.num_gpus, args.epochs, args.interval,
                  args.video_title, args.cmap, args.fps, args.cascade),
            nprocs=args.num_gpus,
            join=True)
コード例 #5
0
 def global_update(self, state, lr, E=1):
     """Execute one round of serial global update"""
     self._send(state)
     mp.spawn(self._client_update, (lr, E), nprocs=self.client_count)
     self._recv()
     return self._fed_avg(), sum(self.losses) / self.client_count
コード例 #6
0
ファイル: train.py プロジェクト: xh-liu/Open-Edit
            
        visuals = OrderedDict([('synthesized_image', trainer.get_latest_generated()),
                                   ('real_image', data_i['image'])])
        visualizer.display_current_results(visuals, epoch, iter_counter.total_steps_so_far)

        if rank == 0:
            print('saving the latest model (epoch %d, total_steps %d)' %
                  (epoch, iter_counter.total_steps_so_far))
            trainer.save('latest')
            iter_counter.record_current_iter()

        trainer.update_learning_rate(epoch)
        iter_counter.record_epoch_end()

        if (epoch % opt.save_epoch_freq == 0 or epoch == iter_counter.total_epochs) and (rank == 0):
            print('saving the model at the end of epoch %d, iters %d' %
                  (epoch, iter_counter.total_steps_so_far))
            trainer.save(epoch)
    
    print('Training was successfully finished.')
    
if __name__ == '__main__':
    global TrainOptions
    TrainOptions = TrainOptions()
    opt = TrainOptions.parse(save=True)
    opt.world_size = opt.num_gpu
    opt.mpdist = True

    mp.set_start_method('spawn', force=True)
    mp.spawn(main_worker, nprocs=opt.world_size, args=(opt.world_size, opt))
コード例 #7
0
    model = UGATITPlus(args)
    model.train()
    model.cuda()
    # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
    model.G_A = torch.nn.parallel.DistributedDataParallel(model.G_A,
                                                          device_ids=[rank])
    model.G_B = torch.nn.parallel.DistributedDataParallel(model.G_B,
                                                          device_ids=[rank])
    model.D_A = torch.nn.parallel.DistributedDataParallel(model.D_A,
                                                          device_ids=[rank])
    model.D_B = torch.nn.parallel.DistributedDataParallel(model.D_B,
                                                          device_ids=[rank])

    dataset = UnpairDataset(args)
    sample = torch.utils.data.distributed.DistributedSampler(dataset)
    train_loader = DataLoader(dataset,
                              args.batchsize,
                              num_workers=args.worker,
                              sampler=sample)
    trainer = Train(model, train_loader, sample, args)
    trainer.train()


if __name__ == '__main__':
    args = cfgs
    port_id = 10000 + np.random.randint(0, 5000)
    args.dist_url = 'tcp://127.0.0.1:' + str(port_id)
    args.gpus_num = torch.cuda.device_count()
    mp.spawn(main_worker, nprocs=args.gpus_num, args=(args, ))
コード例 #8
0
 def start_predicting(self, trainer):
     mp.spawn(self.new_process, **self.mp_spawn_kwargs)
コード例 #9
0
ファイル: train.py プロジェクト: bage79/transformer-evolution
    parser.add_argument('--project',
                        default='albert-train',
                        type=str,
                        required=False,
                        help='project name for wandb')
    args = parser.parse_args()
    args.save = os.path.abspath(
        os.path.join(os.getcwd(), "save", f'{get_model_filename(args)}.pth'))
    for arg in vars(args):
        print(arg, getattr(args, arg))

    if torch.cuda.is_available():
        args.n_gpu = torch.cuda.device_count() if args.gpu is None else 1
    else:
        args.n_gpu = 0
    set_seed(args)

    if not os.path.exists(os.path.dirname(args.save)):
        os.makedirs(os.path.dirname(args.save))

    if 1 < args.n_gpu:
        # noinspection PyTypeChecker
        mp.spawn(train_model,
                 args=(args.n_gpu, args),
                 nprocs=args.n_gpu,
                 join=True)
    else:
        train_model(rank=0 if args.gpu is None else args.gpu,
                    world_size=args.n_gpu,
                    args=args)
コード例 #10
0
    training_dbs = [datasets[dataset](config["db"], split=train_split, sys_config=system_config) for _ in range(workers)]
    validation_db = datasets[dataset](config["db"], split=val_split, sys_config=system_config)

    if rank == 0:
        print("system config...")
        pprint.pprint(system_config.full)

        print("db config...")
        pprint.pprint(training_dbs[0].configs)

        print("len of db: {}".format(len(training_dbs[0].db_inds)))
        print("distributed: {}".format(args.distributed))

    train(training_dbs, validation_db, system_config, model, args)

if __name__ == "__main__":
    args = parse_args()

    distributed = args.distributed
    world_size  = args.world_size

    if distributed and world_size < 0:
        raise ValueError("world size must be greater than 0 in distributed training")

    ngpus_per_node  = torch.cuda.device_count()
    if distributed:
        args.world_size = ngpus_per_node * args.world_size
        mp.spawn(main, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
    else:
        main(None, ngpus_per_node, args)
コード例 #11
0
ファイル: test_quantize_fx.py プロジェクト: lresende/pytorch
    def _test_model_impl(
            self, mode, name, model, eager_quantizable_model,
            check_with_eager=True,
            diff_of_quant=None,
            diff_from_eager=None):
        if diff_of_quant is None or diff_from_eager is None:
            diff_of_quant = {}
            diff_from_eager = {}

        if mode not in diff_of_quant or mode not in diff_from_eager:
            diff_of_quant[mode] = {}
            diff_from_eager[mode] = {}

        input_tensor = torch.rand(1, 3, 224, 224)
        input_tensor_inception = torch.rand(1, 3, 299, 299)
        output_value = torch.randint(0, 1, (1,))

        # print('quantizing:', name, ' mode:', mode)
        if name == 'inception_v3':
            input_value = input_tensor_inception
        else:
            input_value = input_tensor

        qconfig = default_qconfig if mode == 'static' else default_qat_qconfig
        qconfig_dict = {'': qconfig}
        graph_module = symbolic_trace(model)
        # print('graph module:', graph_module.src)
        script = torch.jit.script(graph_module)

        # make sure graph module and script module are both runanble
        original_out = graph_module(input_value)
        is_not_tuple_out = not isinstance(original_out, tuple)
        script_out = script(input_value)
        self.assertEqual(
            (original_out - script_out).abs().max(), 0,
            'Reslut of original graph module and script module does not match')

        # set to train just before quantization
        if mode != 'static':
            model.train()

        graph_module = fuse_fx(graph_module)
        prepared = prepare_fx(graph_module, qconfig_dict)

        if mode == 'ddp':
            mp.spawn(run_ddp,
                     args=(world_size, prepared),
                     nprocs=world_size,
                     join=True)
        elif mode == 'qat':
            assert prepared.training, 'prepared must be in training mode for qat'
            optimizer = torch.optim.SGD(prepared.parameters(), lr=0.0001)
            criterion = nn.CrossEntropyLoss()
            train_one_epoch(prepared, criterion, optimizer, [(input_value, output_value)], torch.device('cpu'), 1)
        else:
            for i in range(10):
                prepared(input_value)

        # print('after observation root:', prepared.root)

        qgraph = convert_fx(prepared)
        # print('after quantization root:', qgraph.root)
        # print('after quantization code:', qgraph.src)
        qgraph.eval()
        qgraph_script = torch.jit.script(qgraph)
        # print('quantized and scripted:', qgraph_script.graph)

        qgraph_out = qgraph(input_value)
        qgraph_script = qgraph_script(input_value)

        if is_not_tuple_out:
            diff_of_quant[mode][name] = (original_out - qgraph_out).abs().max()
            assert torch.allclose(qgraph_out, qgraph_script), 'graph, scripted graph'
        else:
            print('tuple output')

        if eager_quantizable_model is not None:
            # comparing to eager mode quantization
            qeager = eager_quantizable_model
            ref_out = qeager(input_value)
            qeager.qconfig = qconfig
            if mode == 'static':
                qeager.fuse_model()
                prepare(qeager, inplace=True)
            else:
                qeager.train()
                qeager.fuse_model()
                prepare_qat(qeager, inplace=True)

            # calibration
            if mode == 'ddp':
                mp.spawn(run_ddp,
                         args=(world_size, qeager),
                         nprocs=world_size,
                         join=True)
            elif mode == 'qat':
                assert qeager.training, 'qeager should be in training mode for qat'
                optimizer = torch.optim.SGD(qeager.parameters(), lr=0.0001)
                train_one_epoch(qeager, criterion, optimizer, [(input_value, output_value)], torch.device('cpu'), 1)
            else:
                for i in range(10):
                    qeager(input_value)

            # print('ref after observation:', qeager)

            convert(qeager, inplace=True)
            qeager.eval()

            # print('ref after quantization:', qeager)
            qeager_out = qeager(input_value)
            qeager_script = torch.jit.script(qeager)
            qscript_out = qeager_script(input_value)
            if is_not_tuple_out:
                diff_from_eager[mode][name] = (qeager_out - qgraph_out).abs().max()
                if check_with_eager:
                    self.assertEqual(diff_from_eager[mode][name], 0,
                                     'Result of graph mode quantization and ' +
                                     'eager mode quantization on model: ' + name +
                                     ' should match. Mode: ' + mode +
                                     ' diff:' + str(diff_from_eager[mode][name]))
コード例 #12
0
ファイル: dqn_apex.py プロジェクト: yueweizhizhu/machin
            smoothed_total_reward = (smoothed_total_reward * 0.9 +
                                     total_reward * 0.1)
            logger.info("Process {} Episode {} total reward={:.2f}".format(
                rank, episode, smoothed_total_reward))

            if smoothed_total_reward > solved_reward:
                reward_fulfilled += 1
                if reward_fulfilled >= solved_repeat:
                    logger.info("Environment solved!")

                    # will cause torch RPC to complain
                    # since other processes may have not finished yet.
                    # just for demonstration.
                    exit(0)
            else:
                reward_fulfilled = 0

    elif rank in (2, 3):
        # wait for enough samples
        while dqn_apex.replay_buffer.all_size() < 500:
            sleep(0.1)
        while True:
            dqn_apex.update()


if __name__ == "__main__":
    # spawn 4 sub processes
    # Process 0 and 1 will be workers(samplers)
    # Process 2 and 3 will be learners
    spawn(main, nprocs=4)
コード例 #13
0
ファイル: cli.py プロジェクト: lucidrains/lightweight-gan
def train_from_folder(
    data='./data',
    results_dir='./results',
    models_dir='./models',
    name='default',
    new=False,
    load_from=-1,
    image_size=256,
    optimizer='adam',
    fmap_max=512,
    transparent=False,
    greyscale=False,
    batch_size=10,
    gradient_accumulate_every=4,
    num_train_steps=150000,
    learning_rate=2e-4,
    save_every=1000,
    evaluate_every=1000,
    generate=False,
    generate_types=['default', 'ema'],
    generate_interpolation=False,
    aug_test=False,
    aug_prob=None,
    aug_types=['cutout', 'translation'],
    dataset_aug_prob=0.,
    attn_res_layers=[32],
    freq_chan_attn=False,
    disc_output_size=1,
    dual_contrast_loss=False,
    antialias=False,
    interpolation_num_steps=100,
    save_frames=False,
    num_image_tiles=None,
    num_workers=None,
    multi_gpus=False,
    calculate_fid_every=None,
    calculate_fid_num_images=12800,
    clear_fid_cache=False,
    seed=42,
    amp=False,
    show_progress=False,
):
    num_image_tiles = default(num_image_tiles, 4 if image_size > 512 else 8)

    model_args = dict(name=name,
                      results_dir=results_dir,
                      models_dir=models_dir,
                      batch_size=batch_size,
                      gradient_accumulate_every=gradient_accumulate_every,
                      attn_res_layers=cast_list(attn_res_layers),
                      freq_chan_attn=freq_chan_attn,
                      disc_output_size=disc_output_size,
                      dual_contrast_loss=dual_contrast_loss,
                      antialias=antialias,
                      image_size=image_size,
                      num_image_tiles=num_image_tiles,
                      optimizer=optimizer,
                      num_workers=num_workers,
                      fmap_max=fmap_max,
                      transparent=transparent,
                      greyscale=greyscale,
                      lr=learning_rate,
                      save_every=save_every,
                      evaluate_every=evaluate_every,
                      aug_prob=aug_prob,
                      aug_types=cast_list(aug_types),
                      dataset_aug_prob=dataset_aug_prob,
                      calculate_fid_every=calculate_fid_every,
                      calculate_fid_num_images=calculate_fid_num_images,
                      clear_fid_cache=clear_fid_cache,
                      amp=amp)

    if generate:
        model = Trainer(**model_args)
        model.load(load_from)
        samples_name = timestamped_filename()
        checkpoint = model.checkpoint_num
        dir_result = model.generate(samples_name, num_image_tiles, checkpoint,
                                    generate_types)
        print(f'sample images generated at {dir_result}')
        return

    if generate_interpolation:
        model = Trainer(**model_args)
        model.load(load_from)
        samples_name = timestamped_filename()
        model.generate_interpolation(samples_name,
                                     num_image_tiles,
                                     num_steps=interpolation_num_steps,
                                     save_frames=save_frames)
        print(
            f'interpolation generated at {results_dir}/{name}/{samples_name}')
        return

    if show_progress:
        model = Trainer(**model_args)
        model.show_progress(num_images=num_image_tiles, types=generate_types)
        return

    if aug_test:
        DiffAugmentTest(data=data,
                        image_size=image_size,
                        batch_size=batch_size,
                        types=aug_types,
                        nrow=num_image_tiles)
        return

    world_size = torch.cuda.device_count()

    if world_size == 1 or not multi_gpus:
        run_training(0, 1, model_args, data, load_from, new, num_train_steps,
                     name, seed)
        return

    mp.spawn(run_training,
             args=(world_size, model_args, data, load_from, new,
                   num_train_steps, name, seed),
             nprocs=world_size,
             join=True)
コード例 #14
0
ファイル: train.py プロジェクト: lucaslie/torchprune
    def _train_procedure(self,
                         net,
                         n_idx,
                         retraining,
                         keep_ratio,
                         s_idx=0,
                         r_idx=0):

        # the parameters
        steps_per_epoch = len(self.train_loader)
        params = self.retrain_params if retraining else self.train_params

        # check the file names we need
        file_name_net = self._get_net_name(net, n_idx, retraining, keep_ratio,
                                           s_idx, r_idx, False)
        file_name_check = self._get_net_name(net, n_idx, retraining,
                                             keep_ratio, s_idx, r_idx, True)
        file_name_rewind = self._get_net_name(
            net,
            n_idx,
            retraining,
            keep_ratio,
            s_idx,
            r_idx,
            False,
            False,
            True,
        )
        file_name_best = self._get_net_name(
            net,
            n_idx,
            retraining,
            keep_ratio,
            s_idx,
            r_idx,
            get_checkpoint=False,
            get_best=True,
        )

        # get test metrics assembled
        metrics_test = get_test_metrics(params)

        # set up the train logger
        # doing this before returning with pre-trained net is important so that
        # we don't have old data stored in the train logger.
        if self._train_logger is not None:
            self._train_logger.initialize(
                net_class_name=type(net).__name__,
                is_retraining=retraining,
                num_epochs=params["numEpochs"],
                steps_per_epoch=steps_per_epoch,
                early_stop_epoch=params["earlyStopEpoch"],
                metrics_test=metrics_test,
                n_idx=n_idx,
                r_idx=r_idx,
                s_idx=s_idx,
            )

        # check if network is already pretrained and done. then we can return
        found_trained_net, _ = load_checkpoint(
            file_name_net,
            net,
            train_logger=self._train_logger,
            loc=str(next(net.parameters()).device),
        )

        if found_trained_net:
            print("Loading pre-trained network...")
            return

        # retrieve net handle
        if hasattr(net, "compressed_net"):
            net_handle = net.compressed_net
        else:
            net_handle = net

        # enable grad computations
        torch.set_grad_enabled(True)

        # empty gpu cache to make sure everything is ready for retraining
        torch.cuda.empty_cache()

        # register sparsity pattern to before retraining
        if retraining:
            net_handle.register_sparsity_pattern()

        args = (
            self.num_gpus,
            self.train_loader.num_workers,
            net_handle,
            retraining,
            self.train_loader.dataset,
            self.valid_loader.dataset,
            self.train_loader.collate_fn,
            params,
            self._train_logger,
            file_name_check,
            file_name_rewind,
            file_name_best,
        )

        # setup torch.distributed and spawn processes
        if not retraining or net.retrainable:
            if self.num_gpus > 1:
                os.environ["MASTER_ADDR"] = "127.0.0.1"
                os.environ["MASTER_PORT"] = "12355"
                mp.spawn(train_with_worker, nprocs=self.num_gpus, args=args)
            else:
                train_with_worker(0, *args)

        # disable grad computations
        torch.set_grad_enabled(False)

        # load result into this net here
        load_checkpoint(
            file_name_check,
            net_handle,
            train_logger=self._train_logger,
            loc=str(next(net_handle.parameters()).device),
        )

        # then overwrite with early stopping checkpoint (net only, no logger!)
        found_best, epoch_best = load_checkpoint(
            file_name_best,
            net_handle,
            loc=str(next(net_handle.parameters()).device),
        )
        if found_best:
            print(f"Loaded early stopping checkpoint from epoch: {epoch_best}")

        # store full net as well
        save_checkpoint(file_name_net, net, params["numEpochs"],
                        self._train_logger)

        # delete checkpoint to save storage
        delete_checkpoint(file_name_check)
        delete_checkpoint(file_name_best)
コード例 #15
0
ファイル: hyparam_tune_jobs.py プロジェクト: katieb1/NEvolve
    cluster.add_slurm_cmd(cmd='cpus-per-task',
                          value='1',
                          comment='nb cpus / task')
    cluster.add_slurm_cmd(cmd='account',
                          value='def-training-wa_gpu',
                          comment='account')
    cluster.add_slurm_cmd(cmd='reservation',
                          value='hackathon-wr_gpu',
                          comment='reservation')
    cluster.add_slurm_cmd(cmd='mem-per-cpu',
                          value='10g',
                          comment='memory per CPU')

    # Notify job status
    cluster.notify_job_status(email='*****@*****.**',
                              on_done=True,
                              on_fail=True)

    # Set job options
    cluster.per_experiment_nb_gpus = 1
    cluster.per_experiment_nb_nodes = 1
    cluster.job_time = '3:00:00'

    # Run models on cluster
    cluster.optimize_parallel_cluster_cpu(mp.spawn(train,
                                                   nprocs=args.gpus,
                                                   args=(args, )),
                                          nb_trials=20,
                                          job_name='first_tt_batch',
                                          job_display_name='CNN_ddp_tune')
コード例 #16
0
 def start_training(self, trainer):
     mp.spawn(self.new_process, **self.mp_spawn_kwargs)
     # reset optimizers, since main process is never used for training and thus does not have a valid optim state
     trainer.optimizers = []
コード例 #17
0
def run_demo(demo_fn, world_size):
    mp.spawn(demo_fn, args=(world_size, ), nprocs=world_size, join=True)
コード例 #18
0
            # loss
            batch_loss = cross_entropy_loss(y_pred, y)
            epoch_loss = loss_container(batch_loss.item())

            # metrics: top-1,5 error
            epoch_acc = raw_metric(y_pred, y)

    # end of validation
        epoch_loss = torch.tensor(epoch_loss).cuda()
        epoch_acc = torch.tensor(epoch_acc).cuda()
        dist.all_reduce(epoch_loss, op=dist.ReduceOp.SUM)
        dist.all_reduce(epoch_acc, op=dist.ReduceOp.SUM)
        epoch_loss = epoch_loss.item()/ngpus_per_node
        epoch_acc = epoch_acc.cpu().numpy()/ngpus_per_node
    logs['val_{}'.format(loss_container.name)] = epoch_loss
    logs['val_{}'.format(raw_metric.name)] = epoch_acc
    end_time = time.time()

    batch_info = 'Val Loss {:.4f}, Val Acc ({:.2f})'.format(epoch_loss, epoch_acc[0])

    # write log for this epoch
    if local_rank == 0:
        logging.info('Valid: {}, Time {:3.2f}'.format(batch_info, end_time - start_time))
        logging.info('')


if __name__ == '__main__':
    ngpus_per_node = torch.cuda.device_count()
    mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, ''))
コード例 #19
0
ファイル: train_gwpe.py プロジェクト: tanghyd/gravflows
        
    cleanup_nccl()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # training settings
    parser.add_argument('--num_gpus', type=int, default=1)
    parser.add_argument('--lr', type=float, default=5e-4)
    parser.add_argument('--batch_size', type=int, default=4096)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--interval', type=int, default=2)
    parser.add_argument('--save', type=int, default=10)
    parser.add_argument('--num_workers', type=int, default=4, help="Number of workers for dataloader.")
    parser.add_argument('--num_basis', type=int, default=100, help="Number of SVD reduced basis elements to use")
    parser.add_argument('--verbose', default=False, action="store_true")
    # parser.add_argument('--profile', default=False, action="store_true")


    args = parser.parse_args()

    assert isinstance(args.num_gpus, int), "num_gpus argument must be an integer."
    assert args.num_gpus > 0 and args.num_gpus <= torch.cuda.device_count(), f"{args.num_gpus} not a valid number of GPU devices."

    # data distributed parallel
    mp.spawn(
        train,
        args=tuple(args.__dict__.values()),  # assumes parser loaded in correct order
        nprocs=args.num_gpus,
        join=True
    )
コード例 #20
0
        print(('Validation ||' + (' %s: %.3f |' * len(losses)) + ')') %
              tuple(loss_labels),
              flush=True)


def compute_validation_map(yolact_net, dataset):
    with torch.no_grad():
        yolact_net.eval()
        logger = logging.getLogger("yolact.eval")
        logger.info("Computing validation mAP (this may take a while)...")
        eval_script.evaluate(yolact_net,
                             dataset,
                             train_mode=True,
                             train_cfg=cfg)
        yolact_net.train()


def setup_eval():
    eval_script.parse_args([
        '--no_bar', '--fast_eval', '--max_images=' + str(args.validation_size)
    ])


if __name__ == '__main__':
    if args.num_gpus is None:
        args.num_gpus = torch.cuda.device_count()
    if args.num_gpus > 1:
        mp.spawn(train, nprocs=args.num_gpus, args=(args, ), daemon=False)
    else:
        train(0, args=args)
コード例 #21
0
def main():
    ngpus_per_node = torch.cuda.device_count()
    worldsize = ngpus_per_node
    mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, ))
コード例 #22
0
ファイル: cifar-train.py プロジェクト: nerdslab/myow
def main():
    world_size = torch.cuda.device_count()
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = FLAGS.port
    mp.spawn(train, nprocs=world_size, args=(world_size, FLAGS), join=True)
コード例 #23
0
    def fit(self,
            model: LightningModule,
            train_dataloader: Optional[DataLoader] = None,
            val_dataloaders: Optional[Union[DataLoader,
                                            List[DataLoader]]] = None):
        r"""
        Runs the full optimization routine.

        Args:
            model: Model to fit.

            train_dataloader: A Pytorch
                DataLoader with training samples. If the model has
                a predefined train_dataloader method this will be skipped.

            val_dataloaders: Either a single
                Pytorch Dataloader or a list of them, specifying validation samples.
                If the model has a predefined val_dataloaders method this will be skipped

        Example::

            # Option 1,
            # Define the train_dataloader() and val_dataloader() fxs
            # in the lightningModule
            # RECOMMENDED FOR MOST RESEARCH AND APPLICATIONS TO MAINTAIN READABILITY
            trainer = Trainer()
            model = LightningModule()
            trainer.fit(model)

            # Option 2
            # in production cases we might want to pass different datasets to the same model
            # Recommended for PRODUCTION SYSTEMS
            train, val = DataLoader(...), DataLoader(...)
            trainer = Trainer()
            model = LightningModule()
            trainer.fit(model, train_dataloader=train, val_dataloaders=val)

            # Option 1 & 2 can be mixed, for example the training set can be
            # defined as part of the model, and validation can then be feed to .fit()

        """
        # bind logger and other properties
        model.logger = self.logger
        self.copy_trainer_model_properties(model)

        # clean hparams
        if hasattr(model, 'hparams'):
            parsing.clean_namespace(model.hparams)

        # set up the passed in dataloaders (if needed)
        self.__attach_dataloaders(model, train_dataloader, val_dataloaders)

        # check that model is configured correctly
        self.check_model_configuration(model)

        # callbacks
        self.on_fit_start()
        if self.is_function_implemented('on_fit_start'):
            model.on_fit_start()

        # on multi-gpu jobs we only want to manipulate (download, etc) on node_rank=0, local_rank=0
        # or in the case where each node needs to do its own manipulation in which case just local_rank=0
        if self.can_prepare_data():
            model.prepare_data()
            self._is_data_prepared = True

        # Run auto batch size scaling
        if self.auto_scale_batch_size:
            if isinstance(self.auto_scale_batch_size, bool):
                self.auto_scale_batch_size = 'power'
            self.scale_batch_size(model, mode=self.auto_scale_batch_size)
            model.logger = self.logger  # reset logger binding

        # Run learning rate finder:
        if self.auto_lr_find:
            self._run_lr_finder_internally(model)
            model.logger = self.logger  # reset logger binding

        # route to appropriate start method
        # when using multi-node or DDP within a node start each module in a separate process
        if self.use_ddp2:
            if self.is_slurm_managing_tasks:
                task = int(os.environ['SLURM_LOCALID'])

            # torchelastic or general non_slurm ddp2
            elif 'WORLD_SIZE' in os.environ and ('GROUP_RANK' in os.environ
                                                 or 'NODE_RANK' in os.environ):
                task = int(os.environ['LOCAL_RANK'])

            self.ddp_train(task, model)
        elif self.use_ddp:
            if self.is_slurm_managing_tasks:
                task = int(os.environ['SLURM_LOCALID'])
                self.ddp_train(task, model)

            # torchelastic or general non_slurm ddp
            elif 'WORLD_SIZE' in os.environ and ('GROUP_RANK' in os.environ
                                                 or 'NODE_RANK' in os.environ):
                task = int(os.environ['LOCAL_RANK'])
                self.ddp_train(task, model)

            elif self.distributed_backend == 'cpu_ddp':
                self.__set_random_port()
                self.model = model
                mp.spawn(self.ddp_train,
                         nprocs=self.num_processes,
                         args=(model, ))

            elif self.distributed_backend == 'ddp_spawn':
                model.share_memory()

                # spin up peers
                mp.spawn(self.ddp_train,
                         nprocs=self.num_processes,
                         args=(model, ))

            elif self.distributed_backend == 'ddp':
                self.spawn_ddp_children(model)

        # 1 gpu or dp option triggers training using DP module
        # easier to avoid NCCL issues
        elif self.use_dp:
            self.dp_train(model)

        elif self.use_horovod:
            self.horovod_train(model)

        elif self.single_gpu:
            self.single_gpu_train(model)

        elif self.use_tpu:  # pragma: no-cover
            rank_zero_info(f'training on {self.tpu_cores} TPU cores')

            #  COLAB_GPU is an env var available by default in Colab environments.
            start_method = 'fork' if self.on_colab_kaggle else 'spawn'

            # track for predict
            self.model = model

            # train
            if self.tpu_id is not None:
                self.tpu_train(self.tpu_id, model)
            else:
                xmp.spawn(self.tpu_train,
                          args=(model, ),
                          nprocs=self.tpu_cores,
                          start_method=start_method)

            # load weights if not interrupted
            self.load_spawn_weights(model)
            self.model = model

        # ON CPU
        else:
            # run through amp wrapper
            if self.use_amp:
                raise MisconfigurationException(
                    'amp + cpu is not supported.  Please use a GPU option')

            # CHOOSE OPTIMIZER
            # allow for lr schedulers as well
            self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(
                model)

            self.run_pretrain_routine(model)

        # callbacks
        self.on_fit_end()

        # model hooks
        if self.is_function_implemented('on_fit_end'):
            model.on_fit_end()

        # return 1 when finished
        # used for testing or when we need to know that training succeeded
        return 1
コード例 #24
0
ファイル: A3c.py プロジェクト: nocommentcode/MachineLearning
    workers = []
    for i in range(num_workers):
        color = colours[i % len(colours)]
        worker_id = "worker_{}".format(i + 1)
        worker_network = ActorCriticModel(image_dim, color_dim, n_classes, C,
                                          lr, cpu_device, weight_repository)
        worker = Worker(worker_id, env_name, worker_network, n_steps, gamma,
                        color, step_queue)
        workers.append(worker)

    print("Running Workers".format(num_workers))
    threads = []
    for worker in workers:
        t = mp.spawn(run,
                     args=(worker, ),
                     nprocs=1,
                     join=False,
                     daemon=False,
                     start_method='spawn')
        threads.append(t)

    steps = []
    losses = []
    batch_size = 1028
    while True:
        worker_steps = step_queue.get()

        if len(steps) > batch_size:
            states = []
            actions = []
            rewards = []
            for step in steps:
コード例 #25
0
ファイル: main_ssl.py プロジェクト: yuty2009/prml-python
        if hasattr(args, 'writer') and args.writer:
            args.writer.add_scalar("Loss/train", train_loss, epoch)
            args.writer.add_scalar("Accu/test", test_accu1, epoch)
            args.writer.add_scalar("Misc/learning_rate", lr, epoch)


if __name__ == '__main__':

    args = parser.parse_args()

    output_prefix = f"ssl_{args.ssl}_{args.arch}"
    output_prefix += "/session_" + datetime.datetime.now().strftime(
        "%Y%m%d%H%M%S")
    if not hasattr(args, 'output_dir'):
        args.output_dir = args.data_dir
    args.output_dir = os.path.join(args.output_dir, output_prefix)
    os.makedirs(args.output_dir)
    print("=> results will be saved to {}".format(args.output_dir))

    args = dist.init_distributed_mode(args)
    if args.mp_dist:
        if args.world_size > args.ngpus:
            print(f"Training with {args.world_size // args.ngpus} nodes, "
                  f"waiting until all nodes join before starting training")
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main, args=(args, ), nprocs=args.ngpus, join=True)
    else:
        main(args.gpu, args)
コード例 #26
0
ファイル: train_helpers.py プロジェクト: zhaoqichang/ivpgan
def run_training(training_fn, nprocs, *args):
    mp.spawn(fn=training_fn,
             args=(nprocs, *args),
             nprocs=nprocs,
             join=True)
コード例 #27
0

if __name__ == '__main__':

    opt = get_opt()

    opt.device = torch.device('cpu' if opt.no_cuda else 'cuda')
    if not opt.no_cuda:
        cudnn.benchmark = True
    if opt.accimage:
        torchvision.set_image_backend('accimage')

    opt.ngpus_per_node = torch.cuda.device_count()
    if opt.distributed:
        opt.world_size = opt.ngpus_per_node * opt.world_size
        mp.spawn(main_worker, nprocs=opt.ngpus_per_node, args=(opt,))
    else:
        main_worker(-1, opt)

'''

# for rate pretrain and fine tune

python main_rate.py \
--label_path /scr-ssd/enguyen/slowed_clips_0.5x/frames_fps16/meta/video_metadata.json \
--video_id_path /scr-ssd/enguyen/432_meta/clip_ids_split_merged.json \
--frame_dir /scr-ssd/enguyen/slowed_clips_0.5x/frames_fps16/ \
--image_size 224 \
--result_path /vision2/u/enguyen/results/rate_pred/run5_stoch_window24 \
--dataset cpr_rate \
--n_classes 2 \
コード例 #28
0
def run_distributed(fn, config, args):
    try:
        mp.spawn(fn, args=(config, args), nprocs=args.n_gpus, join=True)
    except:
        cleanup()
コード例 #29
0
ファイル: main.py プロジェクト: zouheq/tutorials
        dist.init_process_group(
                backend="gloo", rank=rank, world_size=2)

        # Initialize RPC.
        trainer_name = "trainer{}".format(rank)
        rpc.init_rpc(
                trainer_name,
                rank=rank,
                world_size=world_size,
                rpc_backend_options=rpc_backend_options)

        # Trainer just waits for RPCs from master.
    else:
        rpc.init_rpc(
                "ps",
                rank=rank,
                world_size=world_size,
                rpc_backend_options=rpc_backend_options)
        # parameter server do nothing
        pass

    # block until all rpcs finish
    rpc.shutdown()


if __name__=="__main__":
    # 2 trainers, 1 parameter server, 1 master.
    world_size = 4
    mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True)
# END run_worker
コード例 #30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cpu",
                        action="store_true",
                        help="If set, we only use CPU.")
    parser.add_argument("--single_gpu",
                        action="store_true",
                        help="If set, we only use single GPU.")
    parser.add_argument("--fp16",
                        action="store_true",
                        help="If set, we will use fp16.")

    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )

    # environment arguments
    parser.add_argument('-s',
                        '--seed',
                        default=1,
                        type=int,
                        metavar='N',
                        help='manual random seed')
    parser.add_argument('-n',
                        '--num_nodes',
                        default=1,
                        type=int,
                        metavar='N',
                        help='number of nodes')
    parser.add_argument('-g',
                        '--gpus_per_node',
                        default=1,
                        type=int,
                        help='number of gpus per node')
    parser.add_argument('-nr',
                        '--node_rank',
                        default=0,
                        type=int,
                        help='ranking within the nodes')

    # experiments specific arguments
    parser.add_argument('--debug_mode',
                        action='store_true',
                        dest='debug_mode',
                        help='weather this is debug mode or normal')

    parser.add_argument(
        "--model_class_name",
        type=str,
        help="Set the model class of the experiment.",
    )

    parser.add_argument(
        "--experiment_name",
        type=str,
        help=
        "Set the name of the experiment. [model_name]/[data]/[task]/[other]",
    )

    parser.add_argument("--save_prediction",
                        action='store_true',
                        dest='save_prediction',
                        help='Do we want to save prediction')

    parser.add_argument('--epochs',
                        default=2,
                        type=int,
                        metavar='N',
                        help='number of total epochs to run')
    parser.add_argument(
        "--per_gpu_train_batch_size",
        default=16,
        type=int,
        help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--per_gpu_eval_batch_size",
        default=64,
        type=int,
        help="Batch size per GPU/CPU for evaluation.",
    )

    parser.add_argument("--max_length",
                        default=160,
                        type=int,
                        help="Max length of the sequences.")

    parser.add_argument("--warmup_steps",
                        default=-1,
                        type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--learning_rate",
                        default=1e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")

    parser.add_argument(
        "--eval_frequency",
        default=1000,
        type=int,
        help="set the evaluation frequency, evaluate every X global step.",
    )

    parser.add_argument("--train_data",
                        type=str,
                        help="The training data used in the experiments.")

    parser.add_argument(
        "--train_weights",
        type=str,
        help="The training data weights used in the experiments.")

    parser.add_argument("--eval_data",
                        type=str,
                        help="The training data used in the experiments.")

    args = parser.parse_args()

    if args.cpu:
        args.world_size = 1
        train(-1, args)
    elif args.single_gpu:
        args.world_size = 1
        train(0, args)
    else:  # distributed multiGPU training
        #########################################################
        args.world_size = args.gpus_per_node * args.num_nodes  #
        # os.environ['MASTER_ADDR'] = '152.2.142.184'  # This is the IP address for nlp5
        # maybe we will automatically retrieve the IP later.
        os.environ['MASTER_PORT'] = '88888'  #
        mp.spawn(train, nprocs=args.gpus_per_node,
                 args=(args, ))  # spawn how many process in this node
コード例 #31
0
    parser.add_argument("--eval_model_name",
                        default=None,
                        type=str,
                        help="The filename of the model to be loaded from the directory specified in --model_dir")
    parser.add_argument('--mp', '-mp', action='store_true', help="Multiprocessing option")

    args = parser.parse_args()
    procs = []
    use_mp = args.mp
    for split in args.split_schemes:
        flags = Flags()
        args_dict = args.__dict__
        for arg in args_dict:
            setattr(flags, arg, args_dict[arg])
        setattr(flags, "cv", True if flags.fold_num > 2 else False)
        setattr(flags, "views", [(cv, pv) for cv, pv in zip(args.comp_view, args.prot_view)])
        flags['split'] = split
        flags['predict_cold'] = split == 'cold_drug_target'
        flags['cold_drug'] = split == 'cold_drug'
        flags['cold_target'] = split == 'cold_target'
        flags['cold_drug_cluster'] = split == 'cold_drug_cluster'
        flags['split_warm'] = split == 'warm'
        if use_mp:
            p = mp.spawn(fn=main, args=(flags,), join=False)
            procs.append(p)
            # p.start()
        else:
            main(0, flags)
    for proc in procs:
        proc.join()