Ejemplo n.º 1
0
train_sampler = SubsetRandomSampler(train_index)
val_sampler = SubsetRandomSampler(val_index)

# Download the test data
testset = torchvision.datasets.CIFAR10(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform_val_test)

# Data loaders, in the train and validation data we are using the samplers to subset the data
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=10,
    sampler=train_sampler,
    num_workers=4,
    generator=torch.Generator().manual_seed(58))
valloader = torch.utils.data.DataLoader(valset,
                                        batch_size=10,
                                        sampler=val_sampler,
                                        num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size=10, num_workers=4)

# Set cuda as device if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


# Class of the VGG11 neural network, it has to inherit from nn.Module
class VGG19_CIFAR10(nn.Module):
    def __init__(self):
        # Call super constructor of the class
Ejemplo n.º 2
0
def train_and_report_stats(
    config: ConfigSchema,
    rank: Rank = RANK_ZERO,
) -> Generator[Tuple[int, Optional[Stats], Stats, Optional[Stats]], None, None]:
    """Each epoch/pass, for each partition pair, loads in embeddings and edgelist
    from disk, runs HOGWILD training on them, and writes partitions back to disk.
    """

    if config.verbose > 0:
        import pprint
        pprint.PrettyPrinter().pprint(config.to_dict())

    log("Loading entity counts...")
    if maybe_old_entity_path(config.entity_path):
        log("WARNING: It may be that your entity path contains files using the "
            "old format. See D14241362 for how to update them.")
    entity_counts: Dict[str, List[int]] = {}
    for entity, econf in config.entities.items():
        entity_counts[entity] = []
        for part in range(econf.num_partitions):
            with open(os.path.join(
                config.entity_path, "entity_count_%s_%d.txt" % (entity, part)
            ), "rt") as tf:
                entity_counts[entity].append(int(tf.read().strip()))

    # Figure out how many lhs and rhs partitions we need
    nparts_lhs, lhs_partitioned_types = get_partitioned_types(config, Side.LHS)
    nparts_rhs, rhs_partitioned_types = get_partitioned_types(config, Side.RHS)
    vlog("nparts %d %d types %s %s" %
         (nparts_lhs, nparts_rhs, lhs_partitioned_types, rhs_partitioned_types))
    total_buckets = nparts_lhs * nparts_rhs

    sync: AbstractSynchronizer
    bucket_scheduler: AbstractBucketScheduler
    parameter_sharer: Optional[ParameterSharer]
    partition_client: Optional[PartitionClient]
    if config.num_machines > 1:
        if not 0 <= rank < config.num_machines:
            raise RuntimeError("Invalid rank for trainer")
        if not td.is_available():
            raise RuntimeError("The installed PyTorch version doesn't provide "
                               "distributed training capabilities.")
        ranks = ProcessRanks.from_num_invocations(
            config.num_machines, config.num_partition_servers)

        if rank == RANK_ZERO:
            log("Setup lock server...")
            start_server(
                LockServer(
                    num_clients=len(ranks.trainers),
                    nparts_lhs=nparts_lhs,
                    nparts_rhs=nparts_rhs,
                    lock_lhs=len(lhs_partitioned_types) > 0,
                    lock_rhs=len(rhs_partitioned_types) > 0,
                    init_tree=config.distributed_tree_init_order,
                ),
                server_rank=ranks.lock_server,
                world_size=ranks.world_size,
                init_method=config.distributed_init_method,
                groups=[ranks.trainers],
            )

        bucket_scheduler = DistributedBucketScheduler(
            server_rank=ranks.lock_server,
            client_rank=ranks.trainers[rank],
        )

        log("Setup param server...")
        start_server(
            ParameterServer(num_clients=len(ranks.trainers)),
            server_rank=ranks.parameter_servers[rank],
            init_method=config.distributed_init_method,
            world_size=ranks.world_size,
            groups=[ranks.trainers],
        )

        parameter_sharer = ParameterSharer(
            client_rank=ranks.parameter_clients[rank],
            all_server_ranks=ranks.parameter_servers,
            init_method=config.distributed_init_method,
            world_size=ranks.world_size,
            groups=[ranks.trainers],
        )

        if config.num_partition_servers == -1:
            start_server(
                ParameterServer(num_clients=len(ranks.trainers)),
                server_rank=ranks.partition_servers[rank],
                world_size=ranks.world_size,
                init_method=config.distributed_init_method,
                groups=[ranks.trainers],
            )

        if len(ranks.partition_servers) > 0:
            partition_client = PartitionClient(ranks.partition_servers)
        else:
            partition_client = None

        groups = init_process_group(
            rank=ranks.trainers[rank],
            world_size=ranks.world_size,
            init_method=config.distributed_init_method,
            groups=[ranks.trainers],
        )
        trainer_group, = groups
        sync = DistributedSynchronizer(trainer_group)
        dlog = log

    else:
        sync = DummySynchronizer()
        bucket_scheduler = SingleMachineBucketScheduler(
            nparts_lhs, nparts_rhs, config.bucket_order)
        parameter_sharer = None
        partition_client = None
        dlog = lambda msg: None

    # fork early for HOGWILD threads
    log("Creating workers...")
    num_workers = get_num_workers(config.workers)
    pool = create_pool(num_workers)

    def make_optimizer(params: Iterable[torch.nn.Parameter], is_emb: bool) -> Optimizer:
        params = list(params)
        if len(params) == 0:
            optimizer = DummyOptimizer()
        elif is_emb:
            optimizer = RowAdagrad(params, lr=config.lr)
        else:
            if config.relation_lr is not None:
                lr = config.relation_lr
            else:
                lr = config.lr
            optimizer = Adagrad(params, lr=lr)
        optimizer.share_memory()
        return optimizer

    # background_io is only supported in single-machine mode
    background_io = config.background_io and config.num_machines == 1

    checkpoint_manager = CheckpointManager(
        config.checkpoint_path,
        background=background_io,
        rank=rank,
        num_machines=config.num_machines,
        partition_client=partition_client,
    )
    checkpoint_manager.register_metadata_provider(ConfigMetadataProvider(config))
    checkpoint_manager.write_config(config)

    iteration_manager = IterationManager(
        config.num_epochs, config.edge_paths, config.num_edge_chunks,
        iteration_idx=checkpoint_manager.checkpoint_version)
    checkpoint_manager.register_metadata_provider(iteration_manager)

    if config.init_path is not None:
        loadpath_manager = CheckpointManager(config.init_path)
    else:
        loadpath_manager = None

    def load_embeddings(
        entity: EntityName,
        part: Partition,
        strict: bool = False,
        force_dirty: bool = False,
    ) -> Tuple[torch.nn.Parameter, Optional[OptimizerStateDict]]:
        if strict:
            embs, optim_state = checkpoint_manager.read(entity, part,
                                                        force_dirty=force_dirty)
        else:
            # Strict is only false during the first iteration, because in that
            # case the checkpoint may not contain any data (unless a previous
            # run was resumed) so we fall back on initial values.
            embs, optim_state = checkpoint_manager.maybe_read(entity, part,
                                                              force_dirty=force_dirty)
            if embs is None and loadpath_manager is not None:
                embs, optim_state = loadpath_manager.maybe_read(entity, part)
            if embs is None:
                embs, optim_state = init_embs(entity, entity_counts[entity][part],
                                              config.dimension, config.init_scale)
        assert embs.is_shared()
        return torch.nn.Parameter(embs), optim_state

    log("Initializing global model...")

    model = make_model(config)
    trainer = Trainer(
        global_optimizer=make_optimizer(model.parameters(), False),
        loss_fn=config.loss_fn,
        margin=config.margin,
        relations=config.relations,
    )
    evaluator = TrainingRankingEvaluator(
        override_num_batch_negs=config.eval_num_batch_negs,
        override_num_uniform_negs=config.eval_num_uniform_negs,
    )
    eval_batch_size = round_up_to_nearest_multiple(config.batch_size, config.eval_num_batch_negs)

    state_dict, optim_state = checkpoint_manager.maybe_read_model()

    if state_dict is None and loadpath_manager is not None:
        state_dict, optim_state = loadpath_manager.maybe_read_model()
    if state_dict is not None:
        model.load_state_dict(state_dict, strict=False)
    if optim_state is not None:
        trainer.global_optimizer.load_state_dict(optim_state)

    vlog("Loading unpartitioned entities...")
    for entity, econfig in config.entities.items():
        if econfig.num_partitions == 1:
            embs, optim_state = load_embeddings(entity, Partition(0))
            model.set_embeddings(entity, embs, Side.LHS)
            model.set_embeddings(entity, embs, Side.RHS)
            optimizer = make_optimizer([embs], True)
            if optim_state is not None:
                optimizer.load_state_dict(optim_state)
            trainer.entity_optimizers[(entity, Partition(0))] = optimizer

    # start communicating shared parameters with the parameter server
    if parameter_sharer is not None:
        parameter_sharer.share_model_params(model)

    strict = False

    def swap_partitioned_embeddings(
        old_b: Optional[Bucket],
        new_b: Optional[Bucket],
    ):
        # 0. given the old and new buckets, construct data structures to keep
        #    track of old and new embedding (entity, part) tuples

        io_bytes = 0
        log("Swapping partitioned embeddings %s %s" % (old_b, new_b))

        types = ([(e, Side.LHS) for e in lhs_partitioned_types]
                 + [(e, Side.RHS) for e in rhs_partitioned_types])
        old_parts = {(e, old_b.get_partition(side)): side
                     for e, side in types if old_b is not None}
        new_parts = {(e, new_b.get_partition(side)): side
                     for e, side in types if new_b is not None}

        to_checkpoint = set(old_parts) - set(new_parts)
        preserved = set(old_parts) & set(new_parts)

        # 1. checkpoint embeddings that will not be used in the next pair
        #
        if old_b is not None:  # there are previous embeddings to checkpoint
            log("Writing partitioned embeddings")
            for entity, part in to_checkpoint:
                side = old_parts[(entity, part)]
                vlog("Checkpointing (%s %d %s)" %
                     (entity, part, side.pick("lhs", "rhs")))
                embs = model.get_embeddings(entity, side)
                optim_key = (entity, part)
                optim_state = OptimizerStateDict(trainer.entity_optimizers[optim_key].state_dict())
                io_bytes += embs.nelement() * 4  # ignore optim state
                checkpoint_manager.write(entity, part, embs.detach(), optim_state)
                if optim_key in trainer.entity_optimizers:
                    del trainer.entity_optimizers[optim_key]
                # these variables are holding large objects; let them be freed
                del embs
                del optim_state

            bucket_scheduler.release_bucket(old_b)

        # 2. copy old embeddings that will be used in the next pair
        #    into a temporary dictionary
        #
        tmp_emb = {x: model.get_embeddings(x[0], old_parts[x]) for x in preserved}

        for entity, _ in types:
            model.clear_embeddings(entity, Side.LHS)
            model.clear_embeddings(entity, Side.RHS)

        if new_b is None:  # there are no new embeddings to load
            return io_bytes

        # 3. load new embeddings into the model/optimizer, either from disk
        #    or the temporary dictionary
        #
        log("Loading entities")
        for entity, side in types:
            part = new_b.get_partition(side)
            part_key = (entity, part)
            if part_key in tmp_emb:
                vlog("Loading (%s, %d) from preserved" % (entity, part))
                embs, optim_state = tmp_emb[part_key], None
            else:
                vlog("Loading (%s, %d)" % (entity, part))

                force_dirty = bucket_scheduler.check_and_set_dirty(entity, part)
                embs, optim_state = load_embeddings(
                    entity, part, strict=strict, force_dirty=force_dirty)
                io_bytes += embs.nelement() * 4  # ignore optim state

            model.set_embeddings(entity, embs, side)
            tmp_emb[part_key] = embs

            optim_key = (entity, part)
            if optim_key not in trainer.entity_optimizers:
                vlog("Resetting optimizer %s" % (optim_key,))
                optimizer = make_optimizer([embs], True)
                if optim_state is not None:
                    vlog("Setting optim state")
                    optimizer.load_state_dict(optim_state)

                trainer.entity_optimizers[optim_key] = optimizer

        return io_bytes

    # Start of the main training loop.
    for epoch_idx, edge_path_idx, edge_chunk_idx \
            in iteration_manager.remaining_iterations():
        log("Starting epoch %d / %d edge path %d / %d edge chunk %d / %d" %
            (epoch_idx + 1, iteration_manager.num_epochs,
             edge_path_idx + 1, iteration_manager.num_edge_paths,
             edge_chunk_idx + 1, iteration_manager.num_edge_chunks))
        edge_reader = EdgeReader(iteration_manager.edge_path)
        log("edge_path= %s" % iteration_manager.edge_path)

        sync.barrier()
        dlog("Lock client new epoch...")
        bucket_scheduler.new_pass(is_first=iteration_manager.iteration_idx == 0)
        sync.barrier()

        remaining = total_buckets
        cur_b = None
        while remaining > 0:
            old_b = cur_b
            io_time = 0.
            io_bytes = 0
            cur_b, remaining = bucket_scheduler.acquire_bucket()
            print('still in queue: %d' % remaining, file=sys.stderr)
            if cur_b is None:
                if old_b is not None:
                    # if you couldn't get a new pair, release the lock
                    # to prevent a deadlock!
                    tic = time.time()
                    io_bytes += swap_partitioned_embeddings(old_b, None)
                    io_time += time.time() - tic
                time.sleep(1)  # don't hammer td
                continue

            def log_status(msg, always=False):
                f = log if always else vlog
                f("%s: %s" % (cur_b, msg))

            tic = time.time()

            io_bytes += swap_partitioned_embeddings(old_b, cur_b)

            current_index = \
                (iteration_manager.iteration_idx + 1) * total_buckets - remaining

            next_b = bucket_scheduler.peek()
            if next_b is not None and background_io:
                # Ensure the previous bucket finished writing to disk.
                checkpoint_manager.wait_for_marker(current_index - 1)

                log_status("Prefetching")
                for entity in lhs_partitioned_types:
                    checkpoint_manager.prefetch(entity, next_b.lhs)
                for entity in rhs_partitioned_types:
                    checkpoint_manager.prefetch(entity, next_b.rhs)

                checkpoint_manager.record_marker(current_index)

            log_status("Loading edges")
            lhs, rhs, rel = edge_reader.read(
                cur_b.lhs, cur_b.rhs, edge_chunk_idx, config.num_edge_chunks)
            num_edges = rel.size(0)
            # this might be off in the case of tensorlist
            io_bytes += (lhs.nelement() + rhs.nelement() + rel.nelement()) * 4

            log_status("Shuffling edges")
            # Fix a seed to get the same permutation every time; have it
            # depend on all and only what affects the set of edges.
            g = torch.Generator()
            g.manual_seed(hash((edge_path_idx, edge_chunk_idx, cur_b.lhs, cur_b.rhs)))

            num_eval_edges = int(num_edges * config.eval_fraction)
            if num_eval_edges > 0:
                edge_perm = torch.randperm(num_edges, generator=g)
                eval_edge_perm = edge_perm[-num_eval_edges:]
                num_edges -= num_eval_edges
                edge_perm = edge_perm[torch.randperm(num_edges)]
            else:
                edge_perm = torch.randperm(num_edges)

            # HOGWILD evaluation before training
            eval_stats_before: Optional[Stats] = None
            if num_eval_edges > 0:
                log_status("Waiting for workers to perform evaluation")
                all_eval_stats_before = pool.map(call, [
                    partial(
                        process_in_batches,
                        batch_size=eval_batch_size,
                        model=model,
                        batch_processor=evaluator,
                        lhs=lhs, rhs=rhs, rel=rel,
                        indices=eval_edge_perm[s],
                    )
                    for s in split_almost_equally(eval_edge_perm.size(0),
                                                  num_parts=num_workers)
                ])
                eval_stats_before = Stats.sum(all_eval_stats_before).average()
                log("stats before %s: %s" % (cur_b, eval_stats_before))

            io_time += time.time() - tic
            tic = time.time()
            # HOGWILD training
            log_status("Waiting for workers to perform training")
            # FIXME should we only delay if iteration_idx == 0?
            all_stats = pool.map(call, [
                partial(
                    process_in_batches,
                    batch_size=config.batch_size,
                    model=model,
                    batch_processor=trainer,
                    lhs=lhs, rhs=rhs, rel=rel,
                    indices=edge_perm[s],
                    delay=config.hogwild_delay if epoch_idx == 0 and rank > 0 else 0,
                )
                for rank, s in enumerate(split_almost_equally(edge_perm.size(0),
                                                              num_parts=num_workers))
            ])
            stats = Stats.sum(all_stats).average()
            compute_time = time.time() - tic

            log_status(
                "bucket %d / %d : Processed %d edges in %.2f s "
                "( %.2g M/sec ); io: %.2f s ( %.2f MB/sec )" %
                (total_buckets - remaining, total_buckets,
                 lhs.size(0), compute_time, lhs.size(0) / compute_time / 1e6,
                 io_time, io_bytes / io_time / 1e6),
                always=True)
            log_status("%s" % stats, always=True)

            # HOGWILD eval after training
            eval_stats_after: Optional[Stats] = None
            if num_eval_edges > 0:
                log_status("Waiting for workers to perform evaluation")
                all_eval_stats_after = pool.map(call, [
                    partial(
                        process_in_batches,
                        batch_size=eval_batch_size,
                        model=model,
                        batch_processor=evaluator,
                        lhs=lhs, rhs=rhs, rel=rel,
                        indices=eval_edge_perm[s],
                    )
                    for s in split_almost_equally(eval_edge_perm.size(0),
                                                  num_parts=num_workers)
                ])
                eval_stats_after = Stats.sum(all_eval_stats_after).average()
                log("stats after %s: %s" % (cur_b, eval_stats_after))

            # Add train/eval metrics to queue
            yield current_index, eval_stats_before, stats, eval_stats_after

        swap_partitioned_embeddings(cur_b, None)

        # Distributed Processing: all machines can leave the barrier now.
        sync.barrier()

        # Write metadata: for multiple machines, write from rank-0
        log("Finished epoch %d path %d pass %d; checkpointing global state."
            % (epoch_idx + 1, edge_path_idx + 1, edge_chunk_idx + 1))
        log("My rank: %d" % rank)
        if rank == 0:
            for entity, econfig in config.entities.items():
                if econfig.num_partitions == 1:
                    embs = model.get_embeddings(entity, Side.LHS)
                    optimizer = trainer.entity_optimizers[(entity, Partition(0))]

                    checkpoint_manager.write(
                        entity, Partition(0),
                        embs.detach(), OptimizerStateDict(optimizer.state_dict()))

            sanitized_state_dict: ModuleStateDict = {}
            for k, v in ModuleStateDict(model.state_dict()).items():
                if k.startswith('lhs_embs') or k.startswith('rhs_embs'):
                    # skipping state that's an entity embedding
                    continue
                sanitized_state_dict[k] = v

            log("Writing metadata...")
            checkpoint_manager.write_model(
                sanitized_state_dict,
                OptimizerStateDict(trainer.global_optimizer.state_dict()),
            )

        log("Writing the checkpoint...")
        checkpoint_manager.write_new_version(config)

        dlog("Waiting for other workers to write their parts of the checkpoint: rank %d" % rank)
        sync.barrier()
        dlog("All parts of the checkpoint have been written")

        log("Switching to new checkpoint version...")
        checkpoint_manager.switch_to_new_version()

        dlog("Waiting for other workers to switch to the new checkpoint version: rank %d" % rank)
        sync.barrier()
        dlog("All workers have switched to the new checkpoint version")

        # After all the machines have finished committing
        # checkpoints, we remove the old checkpoints.
        checkpoint_manager.remove_old_version(config)

        # now we're sure that all partition files exist,
        # so be strict about loading them
        strict = True

    # quiescence
    pool.close()
    pool.join()

    sync.barrier()

    checkpoint_manager.close()
    if loadpath_manager is not None:
        loadpath_manager.close()

    # FIXME join distributed workers (not really necessary)

    log("Exiting")
    train_dataset = jsonDataset(path=config['data']['train'].split(' ')[0],
                                classes=target_classes)

    valid_dataset = jsonDataset(path=config['data']['valid'].split(' ')[0],
                                classes=target_classes)
elif config['data']['name'] == 'landmark':
    train_data = Landmark_dataset(
        root='/data/kaggle/dacon_landmark_korea/public', is_train=True)
    num_classes = train_data.num_classes
    num_data = len(train_data)
    num_train = int(num_data * 0.7)
    num_valid = num_data - num_train
    train_dataset, valid_dataset = torch.utils.data.random_split(
        dataset=train_data,
        lengths=[num_train, num_valid],
        generator=torch.Generator().manual_seed(config['params']['seed']))
else:
    raise NotImplementedError('Unsupported Dataset: ' +
                              str(config['data']['name']))

assert train_dataset
assert valid_dataset
'''loss'''
# criterion = nn.CrossEntropyLoss(reduction='mean')
criterion = nn.KLDivLoss(reduction='batchmean')
'''print out'''
print("transform : " + str(transform_train))
print("num. train data : " + str(len(train_dataset)))
print("num. valid data : " + str(len(valid_dataset)))
print("num_classes : " + str(num_classes))
Ejemplo n.º 4
0
def train(
    env: gym.Env,
    test_env: gym.Env,
    termination_fn: mbrl.types.TermFnType,
    cfg: omegaconf.DictConfig,
    silent: bool = False,
    work_dir: Optional[str] = None,
) -> np.float32:
    # ------------------- Initialization -------------------
    debug_mode = cfg.get("debug_mode", False)

    obs_shape = env.observation_space.shape
    act_shape = env.action_space.shape

    mbrl.planning.complete_agent_cfg(env, cfg.algorithm.agent)
    agent = hydra.utils.instantiate(cfg.algorithm.agent)

    work_dir = work_dir or os.getcwd()
    # enable_back_compatible to use pytorch_sac agent
    logger = mbrl.util.Logger(work_dir, enable_back_compatible=True)
    logger.register_group(
        mbrl.constants.RESULTS_LOG_NAME,
        MBPO_LOG_FORMAT,
        color="green",
        dump_frequency=1,
    )
    video_recorder = pytorch_sac.VideoRecorder(
        work_dir if cfg.save_video else None)

    rng = np.random.default_rng(seed=cfg.seed)
    torch_generator = torch.Generator(device=cfg.device)
    if cfg.seed is not None:
        torch_generator.manual_seed(cfg.seed)

    # -------------- Create initial overrides. dataset --------------
    dynamics_model = mbrl.util.common.create_one_dim_tr_model(
        cfg, obs_shape, act_shape)

    replay_buffer = mbrl.util.common.create_replay_buffer(cfg,
                                                          obs_shape,
                                                          act_shape,
                                                          rng=rng)
    random_explore = cfg.algorithm.random_initial_explore
    mbrl.util.common.rollout_agent_trajectories(
        env,
        cfg.algorithm.initial_exploration_steps,
        mbrl.planning.RandomAgent(env) if random_explore else agent,
        {} if random_explore else {
            "sample": True,
            "batched": False
        },
        replay_buffer=replay_buffer,
    )

    # ---------------------------------------------------------
    # --------------------- Training Loop ---------------------
    rollout_batch_size = (cfg.overrides.effective_model_rollouts_per_step *
                          cfg.algorithm.freq_train_model)
    trains_per_epoch = int(
        np.ceil(cfg.overrides.epoch_length / cfg.overrides.freq_train_model))
    updates_made = 0
    env_steps = 0
    model_env = mbrl.models.ModelEnv(env,
                                     dynamics_model,
                                     termination_fn,
                                     None,
                                     generator=torch_generator)
    model_trainer = mbrl.models.ModelTrainer(
        dynamics_model,
        optim_lr=cfg.overrides.model_lr,
        weight_decay=cfg.overrides.model_wd,
        logger=None if silent else logger,
    )
    best_eval_reward = -np.inf
    epoch = 0
    sac_buffer = None
    while env_steps < cfg.overrides.num_steps:
        rollout_length = int(
            mbrl.util.math.truncated_linear(*(cfg.overrides.rollout_schedule +
                                              [epoch + 1])))
        sac_buffer_capacity = rollout_length * rollout_batch_size * trains_per_epoch
        sac_buffer_capacity *= cfg.overrides.num_epochs_to_retain_sac_buffer
        sac_buffer = maybe_replace_sac_buffer(
            sac_buffer,
            sac_buffer_capacity,
            obs_shape,
            act_shape,
            torch.device(cfg.device),
        )
        obs, done = None, False
        for steps_epoch in range(cfg.overrides.epoch_length):
            if steps_epoch == 0 or done:
                obs, done = env.reset(), False
            # --- Doing env step and adding to model dataset ---
            next_obs, reward, done, _ = mbrl.util.common.step_env_and_add_to_buffer(
                env, obs, agent, {}, replay_buffer)

            # --------------- Model Training -----------------
            if (env_steps + 1) % cfg.overrides.freq_train_model == 0:
                mbrl.util.common.train_model_and_save_model_and_data(
                    dynamics_model,
                    model_trainer,
                    cfg.overrides,
                    replay_buffer,
                    work_dir=work_dir,
                )

                # --------- Rollout new model and store imagined trajectories --------
                # Batch all rollouts for the next freq_train_model steps together
                rollout_model_and_populate_sac_buffer(
                    model_env,
                    replay_buffer,
                    agent,
                    sac_buffer,
                    cfg.algorithm.sac_samples_action,
                    rollout_length,
                    rollout_batch_size,
                )

                if debug_mode:
                    print(f"Epoch: {epoch}. "
                          f"SAC buffer size: {len(sac_buffer)}. "
                          f"Rollout length: {rollout_length}. "
                          f"Steps: {env_steps}")

            # --------------- Agent Training -----------------
            for _ in range(cfg.overrides.num_sac_updates_per_step):
                if (env_steps +
                        1) % cfg.overrides.sac_updates_every_steps != 0 or len(
                            sac_buffer) < rollout_batch_size:
                    break  # only update every once in a while
                agent.update(sac_buffer, logger, updates_made)
                updates_made += 1
                if not silent and updates_made % cfg.log_frequency_agent == 0:
                    logger.dump(updates_made, save=True)

            # ------ Epoch ended (evaluate and save model) ------
            if (env_steps + 1) % cfg.overrides.epoch_length == 0:
                avg_reward = evaluate(test_env, agent,
                                      cfg.algorithm.num_eval_episodes,
                                      video_recorder)
                logger.log_data(
                    mbrl.constants.RESULTS_LOG_NAME,
                    {
                        "epoch": epoch,
                        "env_step": env_steps,
                        "episode_reward": avg_reward,
                        "rollout_length": rollout_length,
                    },
                )
                if avg_reward > best_eval_reward:
                    video_recorder.save(f"{epoch}.mp4")
                    best_eval_reward = avg_reward
                    torch.save(agent.critic.state_dict(),
                               os.path.join(work_dir, "critic.pth"))
                    torch.save(agent.actor.state_dict(),
                               os.path.join(work_dir, "actor.pth"))
                epoch += 1

            env_steps += 1
            obs = next_obs
    return np.float32(best_eval_reward)
Ejemplo n.º 5
0
import torch
import torch.nn as nn
from torch.utils.data import random_split
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from poutyne import Experiment

# Instanciate the MNIST dataset
train_valid_dataset = MNIST('./datasets', train=True, download=True, transform=ToTensor())
test_dataset = MNIST('./datasets', train=False, download=True, transform=ToTensor())
train_dataset, valid_dataset = random_split(
    train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)
)

# Select CUDA device if available
cuda_device = 0
device = torch.device('cuda:%d' % cuda_device if torch.cuda.is_available() else 'cpu')

# Define the network
network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10))
epochs = 5

# Define the Experiment and train
experiment = Experiment(
    './simple_model',  # Where to log
    network,
    optimizer='sgd',
    loss_function='cross_entropy',
    device=device,
)
experiment.train_dataset(train_dataset, valid_dataset, epochs=epochs)
Ejemplo n.º 6
0
        'image_size': [args.image_size, args.image_size],
        'mean': mean,
        'std': std,
        'data_dir': args.data_path,
        'is_trans': True,
        'is_train': True
    }

    datasets = SegmentDataset(config)

    train_num = int(0.9 * len(datasets))
    val_num = len(datasets) - train_num
    split_num = random.randint(0, 100)
    train_datasets, val_datasets = random_split(
        datasets, [train_num, val_num],
        generator=torch.Generator().manual_seed(split_num))

    train_loader = DataLoader(train_datasets,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers_num,
                              pin_memory=True)
    val_loader = DataLoader(val_datasets,
                            batch_size=4,
                            shuffle=False,
                            num_workers=args.workers_num,
                            pin_memory=True)

    # train_loader = DataLoader(datasets, batch_size=args.batch_size, shuffle=True,
    #                           num_workers=args.workers_num, pin_memory=True)
Ejemplo n.º 7
0
def test_random_sampler(setup_cluster):
    import torch

    data = mt.random.rand(1000, 32, dtype='f4')
    labels = mt.random.randint(0, 2, (1000, 10), dtype='f4')

    train_dataset = MarsDataset(data, labels)

    # test __init__()
    with pytest.raises(ValueError) as e:
        train_sampler = RandomSampler(train_dataset, replacement=1)
    exec_msg = e.value.args[0]
    assert exec_msg == "replacement should be a boolean value, but got replacement=1"

    with pytest.raises(ValueError) as e:
        train_sampler = RandomSampler(train_dataset, num_samples=900)
    exec_msg = e.value.args[0]
    assert exec_msg == "With replacement=False, num_samples should not " + \
        "be specified, since a random permute will be performed."

    with pytest.raises(ValueError) as e:
        train_sampler = RandomSampler(train_dataset,
                                      replacement=True,
                                      num_samples=-1)
    exec_msg = e.value.args[0]
    assert exec_msg == "num_samples should be a positive integer value, but got num_samples=-1"

    train_sampler = RandomSampler(train_dataset)

    # test __len__ num_samples()
    assert len(train_sampler) == 1000
    assert train_sampler.num_samples == 1000

    # test __iter__
    g_cpu = torch.Generator()
    g_cpu.manual_seed(2147483647)

    train_sampler = RandomSampler(train_dataset, generator=g_cpu)
    assert len(train_sampler) == 1000
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=32,
                                               sampler=train_sampler)
    for _, (batch_data, batch_labels) in enumerate(train_loader):
        assert len(batch_data[0]) == 32
        assert len(batch_labels[0]) == 10

    train_sampler = RandomSampler(train_dataset,
                                  replacement=True,
                                  num_samples=900)
    assert len(train_sampler) == 900
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=32,
                                               sampler=train_sampler)
    for _, (batch_data, batch_labels) in enumerate(train_loader):
        assert len(batch_data[0]) == 32
        assert len(batch_labels[0]) == 10

    # torch train
    model = torch.nn.Sequential(
        torch.nn.Linear(32, 64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 10),
        torch.nn.Softmax(dim=1),
    )

    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
    criterion = torch.nn.BCELoss()
    for _ in range(2):
        # 2 epochs
        for _, (batch_data, batch_labels) in enumerate(train_loader):
            outputs = model(batch_data)
            loss = criterion(outputs.squeeze(), batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
Ejemplo n.º 8
0
    def run(self, progressbar=False):
        self.optimizer = self._make_optimizer(self._params)
        self.scheduler = self._make_scheduler(self.optimizer)

        if progressbar:
            progressbar = tqdm.tqdm(total=self.cycles * self.epochs_per_cycle,
                                    mininterval=2.0)
        assert progressbar is False or isinstance(progressbar, tqdm.std.tqdm)

        def _enter_epoch(desc, temperature):
            "Run this at the beginning of each epoch"
            if progressbar:
                progressbar.set_description(desc, refresh=False)
            for g in self.optimizer.param_groups:
                g['temperature'] = temperature

        def _is_sampling_epoch(_epoch):
            "Are we storing a sample at the end of this epoch?"
            _epoch = _epoch % self.epochs_per_cycle
            sampling_epoch = _epoch - (self.descent_epochs +
                                       self.warmup_epochs)
            return (0 <= sampling_epoch) and (sampling_epoch % self.skip == 0)

        # Use an exact gradient for the initial step and loss
        loss, log_prior, potential = self._exact_model_potential_and_grad(
            self.dataloader)
        self.optimizer.sample_momentum()
        self.optimizer.initial_step(calc_metrics=True,
                                    save_state=self.reject_samples)
        step = 0
        self.store_metrics(i=step,
                           loss=loss.item(),
                           log_prior=log_prior.item(),
                           potential=potential.item(),
                           acc=0.,
                           lr=self.optimizer.param_groups[0]["lr"],
                           corresponds_to_sample=True,
                           delta_energy=0.,
                           total_energy=0.,
                           rejected=False)
        self._initial_potential = potential.item()
        self._total_energy = 0.

        assert self.dataloader.sampler.generator is None
        generator = self.dataloader.sampler.generator = torch.Generator()
        postfix = {}
        for cycle in range(self.cycles):
            generator.seed()
            cycle_random_state = generator.get_state()
            for epoch in range(self.epochs_per_cycle):
                if epoch < self.descent_epochs:
                    _enter_epoch(f"Cycle {cycle}, epoch {epoch}, Descent", 0.)
                elif epoch - self.descent_epochs < self.warmup_epochs:
                    _enter_epoch(f"Cycle {cycle}, epoch {epoch}, Warmup",
                                 self.temperature)
                else:
                    _enter_epoch(f"Cycle {cycle}, epoch {epoch}, Sampling",
                                 self.temperature)

                # Run one epoch of potentially-stochastic gradient descent
                # make sure the epochs' data points are always in the same order for this cycle.
                generator.set_state(cycle_random_state)

                for i, (x, y) in enumerate(self.dataloader):
                    step += 1
                    loss, log_prior, potential, acc = self._model_potential_and_grad(
                        x.to(self._params[0].device),
                        y.to(self._params[0].device))
                    store_metrics = (step % self.metrics_skip) == 0
                    self.optimizer.step(calc_metrics=store_metrics)

                    if store_metrics:
                        delta_energy = self.optimizer.delta_energy(
                            self._initial_potential, potential)
                        self.store_metrics(
                            i=step,
                            loss=loss.item(),
                            log_prior=log_prior.item(),
                            potential=potential.item(),
                            acc=acc.item(),
                            lr=self.optimizer.param_groups[0]["lr"],
                            corresponds_to_sample=False,
                            delta_energy=delta_energy,
                            total_energy=self._total_energy + delta_energy)
                        if progressbar:
                            postfix["train/loss"] = loss.item()
                            postfix["train/acc"] = acc.item()
                            postfix["Δₑ"] = delta_energy
                            progressbar.set_postfix(postfix, refresh=False)

                    # Omit the scheduler step in the last iteration, because we
                    # want to run it after `optimizer.final_step`
                    if i < len(self.dataloader) - 1:
                        self.scheduler.step()

                if _is_sampling_epoch(epoch):
                    step += 1
                    # Do the sample's `final_step` using an exact gradient
                    loss, log_prior, potential = self._exact_model_potential_and_grad(
                        self.dataloader)
                    self.optimizer.final_step(calc_metrics=True)
                    delta_energy = self.optimizer.delta_energy(
                        self._initial_potential, potential)
                    self._total_energy += delta_energy
                    self._initial_potential = potential.item()

                    rejected = False
                    if self.reject_samples:
                        rejected, _ = self.optimizer.maybe_reject(delta_energy)
                    self.store_metrics(
                        i=step,
                        loss=loss.item(),
                        log_prior=log_prior.item(),
                        potential=potential.item(),
                        # TODO: do not use stale `acc`, calculate for full training set
                        acc=acc.item(),
                        lr=self.optimizer.param_groups[0]["lr"],
                        corresponds_to_sample=True,
                        delta_energy=delta_energy,
                        total_energy=self._total_energy,
                        rejected=rejected)

                    # Evaluate test accuracy and save to disk the current sample
                    # (correctly rolled back to the previous if rejected)
                    state_dict = self.model.state_dict()
                    eval_results = self._evaluate_model(state_dict, step)
                    self._save_sample(state_dict, cycle, epoch, step)
                    if progressbar:
                        postfix.update(eval_results)
                        postfix["train/loss"] = loss.item()
                        postfix["Δₑ"] = delta_energy
                        progressbar.set_postfix(postfix, refresh=False)
                    self.scheduler.step()

                    # First step for the next epoch, using the same gradient
                    # but potentially a different learning rate
                    if isinstance(self.optimizer, mcmc.HMC):
                        self.optimizer.sample_momentum()
                    self.optimizer.initial_step(calc_metrics=False,
                                                save_state=self.reject_samples)

                else:  # Not an epoch that stores a sample at the end
                    # Evaluate test accuracy every epoch
                    eval_results = self._evaluate_model(
                        self.model.state_dict(), step)
                    if progressbar:
                        postfix.update(eval_results)
                        progressbar.set_postfix(postfix, refresh=False)
                    self.scheduler.step()

                # Update preconditioner, increment progressbar at the end of the epoch
                if self.precond_update is not None and (
                        epoch + 1) % self.precond_update == 0:
                    self.optimizer.update_preconditioner()

                # Important to put here because no new metrics are added
                # Write metrics to disk every 30 seconds
                self.metrics_saver.flush(every_s=30)

                if progressbar:
                    progressbar.update(1)
        # Close the progressbar at the end of the training procedure
        if progressbar:
            progressbar.close()
Ejemplo n.º 9
0
    def setup(self, stage=None):
        #Train set 
        self.train_set = CIFAR10(self.params.PATH_DATASET, train=True, transform=self.transform_train)

        #Val and test set
        test_val_set = CIFAR10(self.params.PATH_DATASET, train=False, transform=self.transform_test)

        len_test_val_set = len(test_val_set)
        split = int(len_test_val_set/2)
        self.val_test, self.test_set = random_split(test_val_set, [split, split], generator=torch.Generator().manual_seed(42))
        assert len(self.val_test) == len(self.test_set)
Ejemplo n.º 10
0
 def seed(self, seed: Optional[int] = None) -> None:
     seed = create_seed(seed, max_bytes=7)
     self._torch_random = torch.Generator(device=self.device)
     self._torch_random.manual_seed(seed)
Ejemplo n.º 11
0
    def forward(self,
                query: torch.Tensor,
                value: torch.Tensor,
                mask: torch.Tensor,
                seed: int,
                random=True):
        length = query.size(2)
        bucket_length = length // self.n_buckets

        query = query / torch.norm(query, dim=-1, keepdim=True)
        # [batch, head, length, d_k]
        flattened_query = query.flatten(0, 1)
        # [batch * head, length, d_k]

        hashes = self.lsh(flattened_query, random)
        # [batch * head, length, rounds]
        sorted_hashes, hash_indices = torch.sort(hashes, dim=1)
        # [batch * head, length, rounds]
        expanded_hash_indices = hash_indices[:, :, None, :].expand(
            -1, -1, self.d_k, -1)
        # [batch * head, length, d_k, rounds]

        expanded_query = flattened_query[...,
                                         None].expand(-1, -1, -1, self.rounds)
        # [batch * head, length, d_k, rounds]
        reordered_query = torch.gather(expanded_query,
                                       dim=1,
                                       index=expanded_hash_indices)
        # [batch * head, length, d_k, rounds]
        reordered_query = reordered_query.reshape(-1, self.n_buckets // 2,
                                                  bucket_length * 2, self.d_k,
                                                  self.rounds)
        # [batch * head, n_buckets // 2, bucket_length * 2, d_k, rounds]

        lookback_key = look_back(reordered_query)
        # [batch * head, n_buckets // 2, bucket_length * 4, d_k, rounds]

        scores = torch.einsum('...ijk,...ljk->...ilk', reordered_query,
                              lookback_key) / math.sqrt(self.d_k)
        # [batch * head, n_buckets // 2, bucket_length * 2, bucket_length * 4, rounds]

        mask = mask[:, None, :, None].expand(-1, self.head, -1,
                                             self.rounds).flatten(0, 1)
        # [batch * head, length, rounds]
        reordered_mask = torch.gather(mask, dim=1, index=hash_indices)
        # [batch * head, length, rounds]
        reordered_mask = reordered_mask.reshape(-1, self.n_buckets // 2,
                                                bucket_length * 2, self.rounds)
        # [batch * head, n_buckets // 2, bucket_length * 2, rounds]
        lookback_mask = look_back(reordered_mask)[..., None, :, :]
        # [batch * head, n_buckets // 2, 1, bucket_length * 4, rounds]
        scores.masked_fill_(mask=~lookback_mask, value=-1e9)

        sorted_hashes = sorted_hashes.reshape(-1, self.n_buckets // 2,
                                              bucket_length * 2, self.rounds)
        # [batch * head, n_buckets // 2, bucket_length * 2, rounds]
        lookback_hash = look_back(sorted_hashes)
        # [batch * head, n_buckets // 2, bucket_length * 4, rounds]
        hash_equiv_mask = (sorted_hashes[..., None, :] !=
                           lookback_hash[..., None, :, :])
        # [batch * head, n_buckets // 2, bucket_length * 2, bucket_length * 4, rounds]
        scores.masked_fill_(mask=hash_equiv_mask, value=-1e9)

        query_indices = hash_indices.reshape(-1, self.n_buckets // 2,
                                             bucket_length * 2, self.rounds)
        # [batch * head, n_buckets // 2, bucket_length * 2, rounds]
        key_indices = look_back(query_indices)
        # [batch * head, n_buckets // 2, bucket_length * 4, rounds]

        causal_mask = query_indices[..., None, :] < key_indices[...,
                                                                None, :, :]
        # [batch * head, n_buckets // 2, bucket_length * 2, bucket_length * 4, rounds]
        scores.masked_fill_(mask=causal_mask, value=-1e9)

        indice_equiv_mask = query_indices[...,
                                          None, :] == key_indices[...,
                                                                  None, :, :]
        # [batch * head, n_buckets // 2, bucket_length * 2, bucket_length * 4, rounds]
        scores.masked_fill_(mask=indice_equiv_mask, value=-1e5)

        original_indices = reverse_sort(hash_indices, dim=1)
        # [batch * head, length, rounds]
        score_indices = original_indices[..., None, :].expand(
            -1, -1, bucket_length * 4, -1)
        # [batch * head, length, bucket_length * 4, rounds]

        expanded_key_indices = key_indices[..., None, :, :].expand(
            -1, -1, bucket_length * 2, -1, -1)
        # [batch * head, n_buckets // 2, bucket_length * 2, bucket_length * 4, rounds]
        reordered_key_indices = torch.gather(expanded_key_indices.flatten(
            1, 2),
                                             dim=1,
                                             index=score_indices)
        # [batch * head, length, bucket_length * 4, rounds]
        flat_reordered_key = reordered_key_indices.flatten(-2,
                                                           -1).flatten(0, 1)
        # [batch * head * length, bucket_length * 4 * rounds]
        sorted_flat_key, flat_key_indices = torch.sort(
            flat_reordered_key.int(), dim=-1)
        # [batch * head * length, bucket_length * 4 * rounds]
        count_shift_keys = torch.ones_like(sorted_flat_key).float()
        # [batch * head * length, bucket_length * 4 * rounds]
        for i in range(1, self.rounds):
            equiv_flat_key = (
                sorted_flat_key[..., i:] == sorted_flat_key[..., :-i]).float()
            count_shift_keys[..., i:] += equiv_flat_key
            count_shift_keys[..., :-i] += equiv_flat_key
        count_key_indices = reverse_sort(flat_key_indices, dim=1)
        # [batch * head * length, bucket_length * 4 * rounds]
        count_key = torch.gather(count_shift_keys,
                                 dim=-1,
                                 index=count_key_indices)
        # [batch * head * length, bucket_length * 4 * rounds]
        reshaped_count_key = count_key.reshape(-1, length, bucket_length * 4,
                                               self.rounds)
        # [batch * head, length, bucket_length * 4, rounds]
        scores = scores.flatten(1, 2)
        # [batch * head, length, bucket_length * 4, rounds]
        scores = torch.gather(scores, dim=1, index=score_indices)
        # [batch * head, length, bucket_length * 4, rounds]
        scores = scores - reshaped_count_key.log().detach()

        scores = scores.flatten(-2, -1)
        # [batch * head, length, bucket_length * 4 * rounds]
        p_attn = F.softmax(scores, dim=-1)
        # [batch * head, length, bucket_length * 4 * rounds]

        if self.training:
            generator = torch.Generator(device=p_attn.get_device())
            generator.manual_seed(seed)
            dropout_mask = torch.bernoulli(p_attn,
                                           p=1 - self.dropout,
                                           generator=generator)
            p_attn = dropout_mask * p_attn / (1 - self.dropout)

        p_attn = p_attn.reshape(-1, length, bucket_length * 4, self.rounds)
        # [batch * head, length, bucket_length * 4, rounds]

        flattened_value = value.flatten(0, 1)[..., None].expand(
            -1, -1, -1, self.rounds)
        # [batch * head, length, d_k, rounds]
        reordered_value = torch.gather(flattened_value,
                                       dim=1,
                                       index=expanded_hash_indices)
        # [batch * head, length, d_k, rounds]
        reshaped_value = reordered_value.reshape(-1, self.n_buckets // 2,
                                                 bucket_length * 2, self.d_k,
                                                 self.rounds)
        # [batch * head, n_buckets // 2, bucket_length * 2, d_k, rounds]
        lookback_value = look_back(reshaped_value)
        # [batch * head, n_buckets // 2, bucket_length * 4, d_k, rounds]

        attn_indices = hash_indices[...,
                                    None, :].expand(-1, -1, bucket_length * 4,
                                                    -1)
        # [batch * head, length, bucket_length * 4, rounds]
        reordered_p_attn = torch.gather(p_attn, dim=1, index=attn_indices)
        # [batch * head, length, bucket_length * 4, rounds]
        new_p_attn = reordered_p_attn.reshape(-1, self.n_buckets // 2,
                                              bucket_length * 2,
                                              bucket_length * 4, self.rounds)
        # [batch * head, n_buckets // 2, bucket_length * 2, bucket_length * 4, rounds]

        attention = torch.einsum('...ijl,...jkl->...ikl', new_p_attn,
                                 lookback_value)
        # [batch * head, n_buckets // 2, bucket_length * 2, d_k, rounds]
        attention = attention.flatten(1, 2)
        # [batch * head, length, d_k, rounds]
        new_indices = original_indices[...,
                                       None, :].expand(-1, -1, self.d_k, -1)
        # [batch * head, length, d_k, rounds]
        attention = torch.gather(attention, dim=1,
                                 index=new_indices).sum(dim=-1)
        # [batch * head, length, d_k]
        attention = attention.reshape(-1, self.head, length, self.d_k)
        # [batch, head, length, d_k]

        return attention
Ejemplo n.º 12
0
    def test_dist_optim(self):
        # local version
        module1 = MyModule()
        module2 = MyModule()
        params = [module1.get_w(), module2.get_w()]
        local_optim = optim.SGD(params, lr=0.05)

        old_w1 = module1.w.clone().detach()
        old_w2 = module2.w.clone().detach()

        g_cpu = torch.Generator()
        g_cpu.manual_seed(0)
        t1 = torch.rand((3, 3), requires_grad=True, generator=g_cpu)
        t2 = torch.rand((3, 3), requires_grad=True, generator=g_cpu)
        output1 = module1.forward(t2)
        output2 = module2.forward(output1)
        loss = torch.add(output2, t1).sum()

        loss.backward()
        local_optim.step()

        # distributed version
        owner1 = "worker%d" % ((self.rank + 1) % self.world_size)
        owner2 = "worker%d" % ((self.rank + 2) % self.world_size)

        remote_module1 = rpc.remote(owner1, MyModule)
        remote_module2 = rpc.remote(owner2, MyModule)
        remote_param1 = remote_method(MyModule.get_w, remote_module1)
        remote_param2 = remote_method(MyModule.get_w, remote_module2)

        old_w1_remote = remote_param1.to_here()

        # sanity check: local and remote initial weights should match
        self.assertEqual(old_w1, remote_param1.to_here())
        self.assertEqual(old_w2, remote_param2.to_here())

        dist_optim = DistributedOptimizer(optim.SGD,
                                          [remote_param1, remote_param2],
                                          lr=0.05)

        with dist_autograd.context() as context_id:
            g_cpu.manual_seed(0)
            t1 = torch.rand((3, 3), requires_grad=True, generator=g_cpu)
            t2 = torch.rand((3, 3), requires_grad=True, generator=g_cpu)
            output1 = rpc_async_method(MyModule.forward, remote_module1, t2)
            output2 = rpc_async_method(MyModule.forward, remote_module2,
                                       output1.wait())
            loss = torch.add(output2.wait(), t1)

            dist_autograd.backward(context_id, [loss.sum()])
            dist_optim.step(context_id)

            new_w1 = rpc_async_method(MyModule.get_w, remote_module1).wait()
            new_w2 = rpc_async_method(MyModule.get_w, remote_module2).wait()

            # ensure optimizer changed weights
            self.assertNotEqual(old_w1, new_w1)
            self.assertNotEqual(old_w2, new_w2)
            # ensure local equals remote
            self.assertEqual(new_w1, module1.get_w())
            self.assertEqual(new_w2, module2.get_w())
def getRandomDataSets():
    train,val = random_split(datasets.ImageFolder("data",data_transforms["train"],[850,150]),generator=torch.Generator().manual_seed(42))
    image_datasets['train'] = train
    image_datasets['val'] = val
    image_datasets['example'] = datasets.ImageFolder('example', data_transforms['example'])
    return image_datasets
Ejemplo n.º 14
0
def _setup_prng():
    """
    Generate shared random seeds to generate pseudo-random sharings of
    zero. For each device, we generator four random seeds:
        "prev"  - shared seed with the previous party
        "next"  - shared seed with the next party
        "local" - seed known only to the local party (separate from torch's default seed to prevent interference from torch.manual_seed)
        "global"- seed shared by all parties

    The "prev" and "next" random seeds are shared such that each process shares
    one seed with the previous rank process and one with the next rank.
    This allows for the generation of `n` random values, each known to
    exactly two of the `n` parties.

    For arithmetic sharing, one of these parties will add the number
    while the other subtracts it, allowing for the generation of a
    pseudo-random sharing of zero. (This can be done for binary
    sharing using bitwise-xor rather than addition / subtraction)
    """
    global generators

    # Initialize RNG Generators
    for key in generators.keys():
        generators[key][torch.device("cpu")] = torch.Generator(
            device=torch.device("cpu"))

    if torch.cuda.is_available():
        cuda_device_names = ["cuda"]
        for i in range(torch.cuda.device_count()):
            cuda_device_names.append(f"cuda:{i}")
        cuda_devices = [torch.device(name) for name in cuda_device_names]

        for device in cuda_devices:
            for key in generators.keys():
                generators[key][device] = torch.Generator(device=device)

    # Generate random seeds for Generators
    # NOTE: Chosen seed can be any number, but we choose as a random 64-bit
    # integer here so other parties cannot guess its value. We use os.urandom(8)
    # here to generate seeds so that forked processes do not generate the same seed.

    # Generate next / prev seeds.
    seed = int.from_bytes(os.urandom(8), "big") - 2**63
    next_seed = torch.tensor(seed)
    prev_seed = torch.tensor([0], dtype=torch.long)  # populated by irecv

    # Send random seed to next party, receive random seed from prev party
    world_size = comm.get().get_world_size()
    rank = comm.get().get_rank()
    if world_size >= 2:  # Guard against segfaults when world_size == 1.
        next_rank = (rank + 1) % world_size
        prev_rank = (next_rank - 2) % world_size

        req0 = comm.get().isend(next_seed, next_rank)
        req1 = comm.get().irecv(prev_seed, src=prev_rank)

        req0.wait()
        req1.wait()
    else:
        prev_seed = next_seed

    prev_seed = prev_seed.item()
    next_seed = next_seed.item()

    # Create local seed - Each party has a separate local generator
    local_seed = int.from_bytes(os.urandom(8), "big") - 2**63

    # Create global generator - All parties share one global generator for sync'd rng
    global_seed = int.from_bytes(os.urandom(8), "big") - 2**63
    global_seed = torch.tensor(global_seed)
    global_seed = comm.get().broadcast(global_seed, 0).item()

    # Create one of each seed per party
    # Note: This is configured to coordinate seeds across cuda devices
    # so that we can one party per gpu. If we want to support configurations
    # where each party runs on multiple gpu's across machines, we will
    # need to modify this.
    for device in generators["prev"].keys():
        generators["prev"][device].manual_seed(prev_seed)
        generators["next"][device].manual_seed(next_seed)
        generators["local"][device].manual_seed(local_seed)
        generators["global"][device].manual_seed(global_seed)
Ejemplo n.º 15
0
 def __init__(self, p_stop=0.01, max_length=1000):
     self.p_stop = p_stop
     self.max_length = max_length
     self.generator = torch.Generator()
Ejemplo n.º 16
0
def train_with_val(net,
                   optimizer,
                   criterion,
                   num_epochs,
                   obj_loss_history: List[List],
                   attr_loss_history: List[List],
                   batch_size,
                   dataset,
                   curr_epoch=0,
                   use_tune=False,
                   model_dir: str = None) -> None:
    """
  Train the model with validation set.
  Parameters:
    [obj/attr]_loss_history: nested list of length 2. history[0] the training loss history and history[1] the validation loss history.
    curr_epoch: the epoch number the model already been trained for.
    model_dir: directory to save model states.
  """

    test_abs = int(len(dataset) * 0.8)
    train_subset, val_subset = random_split(
        dataset, [test_abs, len(dataset) - test_abs],
        generator=torch.Generator().manual_seed(42))
    train_dataloader = DataLoader(train_subset,
                                  batch_size=batch_size,
                                  shuffle=True)
    val_dataloader = DataLoader(val_subset,
                                batch_size=batch_size,
                                shuffle=True)

    for epoch in range(curr_epoch, curr_epoch + num_epochs):
        epoch_steps = 0
        obj_running_loss = 0.0
        attr_running_loss = 0.0
        net.train()
        # ==== Training ====
        for i, batch in tqdm.tqdm(enumerate(train_dataloader),
                                  total=len(train_dataloader),
                                  disable=use_tune,
                                  position=0,
                                  leave=True,
                                  postfix='Train: epoch %d/%d' %
                                  (epoch, curr_epoch + num_epochs)):
            optimizer.zero_grad()
            img, attr_id, obj_id = batch[:3]
            if len(img) == 1:
                # Batchnorm doesn't accept batch with size 1
                continue
            obj_pred, attr_pred = net(img.to(dev))
            obj_loss = criterion(obj_pred, obj_id.to(dev))
            attr_loss = criterion(attr_pred, attr_id.to(dev))
            loss = obj_loss + attr_loss
            loss.backward()
            optimizer.step()

            obj_running_loss += obj_loss.item()
            attr_running_loss += attr_loss.item()
            epoch_steps += 1
            if i % 100 == 99:
                print("[%d, %5d] obj_loss: %.3f, attr_loss: %.3f" %
                      (epoch + 1, i + 1, obj_running_loss / epoch_steps,
                       attr_running_loss / epoch_steps))
                obj_loss_history[0].append(obj_running_loss / epoch_steps)
                attr_loss_history[0].append(attr_running_loss / epoch_steps)
                running_loss = 0.0

        # ==== Validation ====
        obj_val_loss = 0.0
        attr_val_loss = 0.0
        val_steps = 0

        net.eval()
        for i, batch in tqdm.tqdm(enumerate(val_dataloader),
                                  total=len(val_dataloader),
                                  disable=use_tune,
                                  position=0,
                                  leave=True):
            with torch.no_grad():
                img, attr_id, obj_id = batch[:3]
                obj_pred, attr_pred = net(img.to(dev))
                obj_loss = criterion(obj_pred, obj_id.to(dev))
                attr_loss = criterion(attr_pred, attr_id.to(dev))
                obj_val_loss += obj_loss.cpu().numpy()
                attr_val_loss += attr_loss.cpu().numpy()
                val_steps += 1

        obj_val_loss /= val_steps
        attr_val_loss /= val_steps
        print("[%d] obj_val_loss: %.3f, attr_val_loss: %.3f" %
              (epoch + 1, obj_val_loss, attr_val_loss))
        obj_loss_history[1].append(obj_val_loss)
        attr_loss_history[1].append(attr_val_loss)

        # ==== Save model, report to tune ====
        if use_tune:
            with tune.checkpoint_dir(epoch) as checkpoint_dir:
                path = os.path.join(checkpoint_dir, "checkpoint")
                torch.save(
                    {
                        'model_state_dict': net.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'obj_loss': obj_loss_history,
                        'attr_loss': attr_loss_history,
                    }, path)
            acc = calc_acc(net, val_dataloader, use_tune)
            tune.report(loss=(obj_val_loss + attr_val_loss), accuracy=acc)
            print("accuracy: ", acc)
        else:
            if model_dir:
                model_path = os.path.join(model_dir, f"model_{epoch}.pt")
                torch.save(
                    {
                        'model_state_dict': net.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'obj_loss': obj_loss_history,
                        'attr_loss': attr_loss_history,
                    }, model_path)
                old_model = os.path.join(model_dir, f"model_{epoch-1}.pt")
                if os.path.isfile(old_model):
                    os.remove(old_model)
        print("Finished training.")
Ejemplo n.º 17
0
    def train_and_report_stats(
        self
    ) -> Generator[Tuple[int, Optional[Stats], Stats, Optional[Stats]], None,
                   None]:

        holder = self.holder
        config = self.config
        iteration_manager = self.iteration_manager

        total_buckets = holder.nparts_lhs * holder.nparts_rhs

        # yield stats from checkpoint, to reconstruct
        # saved part of the learning curve
        if self.rank == SINGLE_TRAINER:
            for stats_dict in self.checkpoint_manager.maybe_read_stats():
                index: int = stats_dict["index"]
                stats: Stats = Stats.from_dict(stats_dict["stats"])
                eval_stats_before: Optional[Stats] = None
                if "eval_stats_before" in stats_dict:
                    eval_stats_before = Stats.from_dict(
                        stats_dict["eval_stats_before"])
                eval_stats_after: Optional[Stats] = None
                if "eval_stats_after" in stats_dict:
                    eval_stats_after = Stats.from_dict(
                        stats_dict["eval_stats_after"])
                yield (index, eval_stats_before, stats, eval_stats_after)

        for epoch_idx, edge_path_idx, edge_chunk_idx in iteration_manager:
            logger.info(
                f"Starting epoch {epoch_idx + 1} / {iteration_manager.num_epochs}, "
                f"edge path {edge_path_idx + 1} / {iteration_manager.num_edge_paths}, "
                f"edge chunk {edge_chunk_idx + 1} / {iteration_manager.num_edge_chunks}"
            )
            edge_storage = EDGE_STORAGES.make_instance(
                iteration_manager.edge_path)
            logger.info(f"Edge path: {iteration_manager.edge_path}")

            self._barrier()
            dist_logger.info("Lock client new epoch...")
            self.bucket_scheduler.new_pass(
                is_first=iteration_manager.iteration_idx == 0)
            self._barrier()

            remaining = total_buckets
            cur_b: Optional[Bucket] = None
            cur_stats: Optional[BucketStats] = None
            while remaining > 0:
                old_b: Optional[Bucket] = cur_b
                old_stats: Optional[BucketStats] = cur_stats
                cur_b, remaining = self.bucket_scheduler.acquire_bucket()
                logger.info(f"still in queue: {remaining}")
                if cur_b is None:
                    cur_stats = None
                    if old_b is not None:
                        # if you couldn't get a new pair, release the lock
                        # to prevent a deadlock!
                        tic = time.perf_counter()
                        release_bytes = self._swap_partitioned_embeddings(
                            old_b, None, old_stats)
                        release_time = time.perf_counter() - tic
                        logger.info(
                            f"Swapping old embeddings to release lock. io: {release_time:.2f} s for {release_bytes:,} bytes "
                            f"( {release_bytes / release_time / 1e6:.2f} MB/sec )"
                        )
                    time.sleep(1)  # don't hammer td
                    continue

                tic = time.perf_counter()
                self.cur_b = cur_b
                bucket_logger = BucketLogger(logger, bucket=cur_b)
                self.bucket_logger = bucket_logger

                io_bytes = self._swap_partitioned_embeddings(
                    old_b, cur_b, old_stats)
                self.model.set_all_embeddings(holder, cur_b)

                current_index = (iteration_manager.iteration_idx +
                                 1) * total_buckets - remaining

                bucket_logger.debug("Loading edges")
                edges = edge_storage.load_chunk_of_edges(
                    cur_b.lhs,
                    cur_b.rhs,
                    edge_chunk_idx,
                    iteration_manager.num_edge_chunks,
                    shared=True,
                )
                num_edges = len(edges)

                # this might be off in the case of tensorlist or extra edge fields
                io_bytes += edges.lhs.tensor.numel(
                ) * edges.lhs.tensor.element_size()
                io_bytes += edges.rhs.tensor.numel(
                ) * edges.rhs.tensor.element_size()
                io_bytes += edges.rel.numel() * edges.rel.element_size()
                io_time = time.perf_counter() - tic
                tic = time.perf_counter()
                bucket_logger.debug("Shuffling edges")
                # Fix a seed to get the same permutation every time; have it
                # depend on all and only what affects the set of edges.

                # Note: for the sake of efficiency, we sample eval edge idxs
                # from the edge set *with replacement*, meaning that there may
                # be duplicates of the same edge in the eval set. When we swap
                # edges into the eval set, if there are duplicates then all
                # but one will be clobbered. These collisions are unlikely
                # if eval_fraction is small.
                #
                # Importantly, this eval sampling strategy is theoretically
                # sound:
                # * Training and eval sets are (exactly) disjoint
                # * Eval set may have (rare) duplicates, but they are
                #   uniformly sampled so it's still an unbiased estimator
                #   of the out-of-sample statistics
                num_eval_edges = int(num_edges * config.eval_fraction)
                num_train_edges = num_edges - num_eval_edges
                if num_eval_edges > 0:
                    g = torch.Generator()
                    g.manual_seed(
                        hash((edge_path_idx, edge_chunk_idx, cur_b.lhs,
                              cur_b.rhs)))
                    eval_edge_idxs = torch.randint(num_edges,
                                                   (num_eval_edges, ),
                                                   dtype=torch.long,
                                                   generator=g)
                else:
                    eval_edge_idxs = None

                # HOGWILD evaluation before training
                eval_stats_before = self._coordinate_eval(
                    edges, eval_edge_idxs)
                if eval_stats_before is not None:
                    bucket_logger.info(
                        f"Stats before training: {eval_stats_before}")
                eval_time = time.perf_counter() - tic
                tic = time.perf_counter()

                # HOGWILD training
                bucket_logger.debug("Waiting for workers to perform training")
                stats = self._coordinate_train(edges, eval_edge_idxs,
                                               epoch_idx)
                train_time = time.perf_counter() - tic
                tic = time.perf_counter()

                # HOGWILD evaluation after training
                eval_stats_after = self._coordinate_eval(edges, eval_edge_idxs)
                if eval_stats_after is not None:
                    bucket_logger.info(
                        f"Stats before training: {eval_stats_after}")

                eval_time += time.perf_counter() - tic

                bucket_logger.info(
                    f"bucket {total_buckets - remaining} / {total_buckets} : "
                    f"Trained {num_train_edges} edges in {train_time:.2f} s "
                    f"( {num_train_edges / train_time / 1e6:.2g} M/sec ); "
                    f"Eval 2*{num_eval_edges} edges in {eval_time:.2f} s "
                    f"( {2 * num_eval_edges / eval_time / 1e6:.2g} M/sec ); "
                    f"io: {io_time:.2f} s for {io_bytes:,} bytes ( {io_bytes / io_time / 1e6:.2f} MB/sec )"
                )
                bucket_logger.info(f"{stats}")

                self.model.clear_all_embeddings()

                yield current_index, eval_stats_before, stats, eval_stats_after

                cur_stats = BucketStats(
                    lhs_partition=cur_b.lhs,
                    rhs_partition=cur_b.rhs,
                    index=current_index,
                    train=stats,
                    eval_before=eval_stats_before,
                    eval_after=eval_stats_after,
                )

            # release the final bucket
            self._swap_partitioned_embeddings(cur_b, None, cur_stats)

            # Distributed Processing: all machines can leave the barrier now.
            self._barrier()

            self._maybe_write_checkpoint(epoch_idx, edge_path_idx,
                                         edge_chunk_idx)

            # now we're sure that all partition files exist,
            # so be strict about loading them
            self.strict = True
Ejemplo n.º 18
0
    def _sort_shard_and_shuffle_dataset(self):
        # This method returns a list of dataset sample indices after
        # the dataset has been sorted, sharded and shuffled.
        # The sorting of the dataset happens based on the group_size and complexities
        # of each sample.
        # Sharding happens across the number of workers.
        # Shuffling is done either before sharding on the group indices (if group_size is provided)
        # or on the dataset sample indices if the group_size is not provided.

        def sort_in_groups(sample_complexities, group_size):
            """Sort the dataset samples indices inside each group of size group_size."""
            # If the group_size is None, the entire dataset is considered as a single group
            if group_size is None:
                group_size = len(sample_complexities)
            # Sort the dataset samples inside each group of the dataset based on sample complexity.
            for group_begin_index in range(0, len(sample_complexities),
                                           group_size):
                group_end_index = min(group_begin_index + group_size,
                                      len(sample_complexities))
                sorted_indices = group_begin_index + np.argsort(
                    sample_complexities[group_begin_index:group_end_index, 1])
                sample_complexities[
                    group_begin_index:
                    group_end_index, :] = sample_complexities[sorted_indices]
            return sample_complexities

        # Get the samples and their complexities from the complexity_fn
        if not self.sample_complexities:
            self.sample_complexities = np.empty((len(self.dataset), 2),
                                                dtype=np.int64)
            for sample_index in range(len(self.dataset)):
                self.sample_complexities[sample_index][0] = sample_index
                self.sample_complexities[sample_index][1] = self.complexity_fn(
                    self.dataset[sample_index])

        if self.random_number is None:
            max_complexity = max(self.sample_complexities,
                                 key=lambda t: t[1])[1]
            min_complexity = min(self.sample_complexities,
                                 key=lambda t: t[1])[1]
            self.random_number = int((max_complexity - min_complexity) *
                                     self.random_level + 1)

        sample_complexities = self.sample_complexities.copy()

        # Control the degree of load balancing by modifying the complexities of
        # all samples using the random_number.
        g = torch.Generator()
        g = g.manual_seed(self.seed + self.epoch)

        if self.random_number > 1:
            complexity_random_ints = torch.randint(
                self.random_number, (len(sample_complexities), ),
                generator=g).tolist()

            for index, random_int in enumerate(complexity_random_ints):
                sample_complexities[index][1] += random_int

        # Sort the data based on the computed complexities and group sizes.
        # Sort only once if random_number <= 1 else sort everytime
        if self.ordered_sample_complexities is None or self.random_number > 1:
            self.ordered_sample_complexities = sort_in_groups(
                sample_complexities, self.group_size)
        ordered_sample_complexities = self.ordered_sample_complexities

        # If group_size is not None, shuffle the index of each group instead
        # of shuffling the data indices.
        if self.shuffle and self.group_size is not None:
            num_groups = (len(self.sample_complexities) + self.group_size -
                          1) // self.group_size
            group_order = torch.randperm(num_groups, generator=g).tolist()
            end = 0
            sample_complexities_copy = ordered_sample_complexities.copy()
            for group_index in group_order:
                original_list_begin_index = self.group_size * group_index
                original_list_end_index = min(
                    original_list_begin_index + self.group_size,
                    len(sample_complexities))
                begin = end
                end = begin + (original_list_end_index -
                               original_list_begin_index)
                sample_complexities_copy[begin:end, :] = sample_complexities[
                    original_list_begin_index:original_list_end_index, :]
            ordered_sample_complexities = sample_complexities_copy

        # Shard the data across the different workers.
        index_chunks = list(
            _shard_wrapped_indices_across_workers(
                [
                    index_complexity_tuple[0]
                    for index_complexity_tuple in ordered_sample_complexities
                ],
                self.world_size,
                self.num_samples,
            ))

        # Shuffle the sharded data indices deterministically based on epoch and seed.
        chunk_indices = list(range(len(index_chunks)))
        if self.shuffle and self.group_size is None:
            chunk_indices = torch.randperm(len(index_chunks),
                                           generator=g).tolist()

        if not self.drop_last:
            # Add extra samples to make it evenly divisible
            padding_size = self.num_samples - len(chunk_indices)
            if padding_size <= len(chunk_indices):
                chunk_indices += chunk_indices[:padding_size]
            else:
                chunk_indices += (chunk_indices * math.ceil(
                    padding_size / len(chunk_indices)))[:padding_size]
        else:
            # Remove tail of data to make it evenly divisible.
            chunk_indices = chunk_indices[:self.num_samples]

        assert len(chunk_indices) == self.num_samples
        return index_chunks, chunk_indices
Ejemplo n.º 19
0
def generate_images(
    ctx: click.Context,
    network_pkl: str,
    seeds: Optional[List[int]],
    truncation_psi: object,
    noise_mode: str,
    outdir: str,
    class_idx: Optional[int],
    projected_w: Optional[str]
):
    """Generate images using pretrained network pickle.

    Examples:

    \b
    # Generate curated MetFaces images without truncation (Fig.10 left)
    python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \\
        --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl

    \b
    # Generate uncurated MetFaces images with truncation (Fig.12 upper left)
    python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \\
        --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl

    \b
    # Generate class conditional CIFAR-10 images (Fig.17 left, Car)
    python generate.py --outdir=out --seeds=0-35 --class=1 \\
        --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/cifar10.pkl

    \b
    # Render an image from projected W
    python generate.py --outdir=out --projected_w=projected_w.npz \\
        --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl
    """

    print('Loading networks from "%s"...' % network_pkl)

    if network_pkl == 'latest':
        files = glob.glob("training-runs/*/*.pkl")
        files.sort(key=os.path.getmtime)
        network_pkl = files[-1]
    device = torch.device('cuda')
    with dnnlib.util.open_url(network_pkl) as f:
        G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore

    os.makedirs(outdir, exist_ok=True)

    # Synthesize the result of a W projection.
    if projected_w is not None:
        if seeds is not None:
            print ('warn: --seeds is ignored when using --projected-w')
        print(f'Generating images from projected W "{projected_w}"')
        ws = np.load(projected_w)['w']
        ws = torch.tensor(ws, device=device) # pylint: disable=not-callable
        assert ws.shape[1:] == (G.num_ws, G.w_dim)
        for idx, w in enumerate(ws):
            img = G.synthesis(w.unsqueeze(0), noise_mode=noise_mode)
            img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
            img = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/proj{idx:02d}.png')
        return

    if seeds is None:
        ctx.fail('--seeds option is required when not using --projected-w')

    # Labels.
    label = torch.zeros([1, G.c_dim], device=device)
    if G.c_dim != 0:
        if class_idx is None:
            ctx.fail('Must specify class label with --class when using a conditional network')
        label[:, class_idx] = 1
    else:
        if class_idx is not None:
            print ('warn: --class=lbl ignored when running on an unconditional network')

    # Generate images.
    for seed_idx, seed in enumerate(seeds):
        print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
        z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(device)
        rand_gen = None
        if noise_mode == 'generator':
            rand_gen = torch.Generator(device=device)
            rand_gen.manual_seed(seed)
        if truncation_psi == 'seed':
            tpsi = (np.random.RandomState(seed).randint(0, 100) / 100) + 0.5
        else:
            tpsi = truncation_psi
        img = G(z, label, truncation_psi=tpsi, noise_mode=noise_mode, rand_gen=rand_gen)
        img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)[0].cpu().numpy()
        if img.shape[-1] == 1:
            img = np.repeat(img, 3, -1)
        PIL.Image.fromarray(img, 'RGB').save(f'{outdir}/seed{seed:04d}.png')
Ejemplo n.º 20
0
def _setup_przs(device=None):
    """
    Generate shared random seeds to generate pseudo-random sharings of
    zero. The random seeds are shared such that each process shares
    one seed with the previous rank process and one with the next rank.
    This allows for the generation of `n` random values, each known to
    exactly two of the `n` parties.

    For arithmetic sharing, one of these parties will add the number
    while the other subtracts it, allowing for the generation of a
    pseudo-random sharing of zero. (This can be done for binary
    sharing using bitwise-xor rather than addition / subtraction)
    """
    # Initialize RNG Generators
    comm.get().g0 = torch.Generator()
    comm.get().g1 = torch.Generator()

    device = "cuda" if device is None else device
    device = torch.device(device)
    assert device.type == "cuda", "Must be a GPU device"

    if torch.cuda.is_available():
        comm.get().g0_cuda = torch.Generator(device=device)
        comm.get().g1_cuda = torch.Generator(device=device)

    # Generate random seeds for Generators
    # NOTE: Chosen seed can be any number, but we choose as a random 64-bit
    # integer here so other parties cannot guess its value.

    # We sometimes get here from a forked process, which causes all parties
    # to have the same RNG state. Reset the seed to make sure RNG streams
    # are different in all the parties. We use numpy's random here since
    # setting its seed to None will produce different seeds even from
    # forked processes.
    import numpy

    numpy.random.seed(seed=None)
    next_seed = torch.tensor(numpy.random.randint(-(2**63), 2**63 - 1, (1, )))
    prev_seed = torch.LongTensor([0])  # placeholder

    # Send random seed to next party, receive random seed from prev party
    world_size = comm.get().get_world_size()
    rank = comm.get().get_rank()
    if world_size >= 2:  # Otherwise sending seeds will segfault.
        next_rank = (rank + 1) % world_size
        prev_rank = (next_rank - 2) % world_size

        req0 = comm.get().isend(tensor=next_seed, dst=next_rank)
        req1 = comm.get().irecv(tensor=prev_seed, src=prev_rank)

        req0.wait()
        req1.wait()
    else:
        prev_seed = next_seed

    # Seed Generators
    comm.get().g0.manual_seed(next_seed.item())
    comm.get().g1.manual_seed(prev_seed.item())

    # Create global generator
    global_seed = torch.tensor(numpy.random.randint(-(2**63), 2**63 - 1,
                                                    (1, )))
    global_seed = comm.get().broadcast(global_seed, 0)
    comm.get().global_generator = torch.Generator()
    comm.get().global_generator.manual_seed(global_seed.item())
Ejemplo n.º 21
0
 def __init__(self, data_source):
     self.data_source = data_source
     self.gen = torch.Generator().manual_seed(0)
Ejemplo n.º 22
0
p_drop = 0.5
learning_rate = 5e-4
classifiers = [LatticeClassifier, ConvClassifier]  #, HybridClassifier]
classifier_names = ["lattice", "conv"]  #,"hybrid"]
for (Classifier, name) in zip(classifiers, classifier_names):
    train_accuracy = torch.zeros(n_epochs, n_trials)
    test_accuracy = torch.zeros(n_epochs, n_trials)
    train_loss = torch.zeros(n_epochs, n_trials)
    #print("testing model '{:s}'".format(name))
    for trial in range(n_trials):
        trial_start = time.time()
        data = [[X[index, :, :, :], Y[index]] for index in range(X.shape[0])]
        training_data, testing_data = random_split(
            data, [len(data) - len(data) // 10,
                   len(data) // 10],
            generator=torch.Generator().manual_seed(42 + trial))
        trainloader = DataLoader(training_data,
                                 batch_size=128,
                                 shuffle=True,
                                 pin_memory=True)
        testloader = DataLoader(testing_data,
                                batch_size=128,
                                shuffle=False,
                                pin_memory=True)
        print("{:s} trial {:d}".format(name, trial + 1))
        model = Classifier(feature_dim,
                           n_features,
                           n_classes,
                           alpha=alpha,
                           p_drop=p_drop)
        model = model.to(device)
Ejemplo n.º 23
0
def main_worker(args):
    global start_epoch, best_recall5
    init_dist(args.launcher, args)
    synchronize()

    if args.seed is not None:
        random.seed(args.seed)
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)

    if args.deterministic:
        cudnn.deterministic = True
        cudnn.benchmark = False

    print("Use GPU: {} for training, rank no.{} of world_size {}".format(
        args.gpu, args.rank, args.world_size))

    if (args.rank == 0):
        sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))
        print("==========\nArgs:{}\n==========".format(args))

    # Create data loaders
    iters = args.iters if (args.iters > 0) else None
    dataset, train_loader, val_loader, test_loader, sampler, train_extract_loader = get_data(
        args, iters)

    # Create model
    model = get_model(args)

    # Load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        copy_state_dict(checkpoint['state_dict'], model)
        start_epoch = checkpoint['epoch'] + 1
        best_recall5 = checkpoint['best_recall5']
        if (args.rank == 0):
            print("=> Start epoch {}  best recall5 {:.1%}".format(
                start_epoch, best_recall5))

    # Evaluator
    evaluator = Evaluator(model)
    if (args.rank == 0):
        print("Test the initial model:")
    recalls = evaluator.evaluate(
        val_loader,
        sorted(list(set(dataset.q_val) | set(dataset.db_val))),
        dataset.q_val,
        dataset.db_val,
        dataset.val_pos,
        vlad=args.vlad,
        gpu=args.gpu,
        sync_gather=args.sync_gather)

    # Optimizer
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=args.step_size,
                                                   gamma=0.5)

    # Trainer
    trainer = Trainer(model, margin=args.margin**0.5, gpu=args.gpu)
    if ((args.cache_size < args.tuple_size)
            or (args.cache_size > len(dataset.q_train))):
        args.cache_size = len(dataset.q_train)

    # Start training
    for epoch in range(start_epoch, args.epochs):
        sampler.set_epoch(args.seed + epoch)
        args.cache_size = args.cache_size * (2**(epoch // args.step_size))

        g = torch.Generator()
        g.manual_seed(args.seed + epoch)
        subset_indices = torch.randperm(len(
            dataset.q_train), generator=g).long().split(args.cache_size)

        for subid, subset in enumerate(subset_indices):
            update_sampler(sampler,
                           model,
                           train_extract_loader,
                           dataset.q_train,
                           dataset.db_train,
                           subset.tolist(),
                           vlad=args.vlad,
                           gpu=args.gpu,
                           sync_gather=args.sync_gather)
            synchronize()
            trainer.train(epoch,
                          subid,
                          train_loader,
                          optimizer,
                          train_iters=len(train_loader),
                          print_freq=args.print_freq,
                          vlad=args.vlad,
                          loss_type=args.loss_type)
            synchronize()

        if ((epoch + 1) % args.eval_step == 0 or (epoch == args.epochs - 1)):
            recalls = evaluator.evaluate(
                val_loader,
                sorted(list(set(dataset.q_val) | set(dataset.db_val))),
                dataset.q_val,
                dataset.db_val,
                dataset.val_pos,
                vlad=args.vlad,
                gpu=args.gpu,
                sync_gather=args.sync_gather)

            is_best = recalls[1] > best_recall5
            best_recall5 = max(recalls[1], best_recall5)

            if (args.rank == 0):
                save_checkpoint(
                    {
                        'state_dict': model.state_dict(),
                        'epoch': epoch,
                        'best_recall5': best_recall5,
                    },
                    is_best,
                    fpath=osp.join(args.logs_dir,
                                   'checkpoint' + str(epoch) + '.pth'))
                print(
                    '\n * Finished epoch {:3d} recall@1: {:5.1%}  recall@5: {:5.1%}  recall@10: {:5.1%}  best@5: {:5.1%}{}\n'
                    .format(epoch, recalls[0], recalls[1], recalls[2],
                            best_recall5, ' *' if is_best else ''))

        lr_scheduler.step()
        synchronize()

    # final inference
    if (args.rank == 0):
        print("Performing PCA reduction on the best model:")
    model.load_state_dict(
        load_checkpoint(osp.join(args.logs_dir,
                                 'model_best.pth'))['state_dict'])
    pca_parameters_path = osp.join(args.logs_dir, 'pca_params_model_best.h5')
    pca = PCA(args.features, (not args.nowhiten), pca_parameters_path)
    dict_f = extract_features(
        model,
        train_extract_loader,
        sorted(list(set(dataset.q_train) | set(dataset.db_train))),
        vlad=args.vlad,
        gpu=args.gpu,
        sync_gather=args.sync_gather)
    features = list(dict_f.values())
    if (len(features) > 10000):
        features = random.sample(features, 10000)
    features = torch.stack(features)
    if (args.rank == 0):
        pca.train(features)
    synchronize()
    del features
    if (args.rank == 0):
        print("Testing on Pitts30k-test:")
    evaluator.evaluate(test_loader,
                       sorted(list(set(dataset.q_test)
                                   | set(dataset.db_test))),
                       dataset.q_test,
                       dataset.db_test,
                       dataset.test_pos,
                       vlad=args.vlad,
                       pca=pca,
                       gpu=args.gpu,
                       sync_gather=args.sync_gather)
    synchronize()
    return
Ejemplo n.º 24
0
def train_and_report_stats(
    config: ConfigSchema,
    model: Optional[MultiRelationEmbedder] = None,
    trainer: Optional[AbstractBatchProcessor] = None,
    evaluator: Optional[AbstractBatchProcessor] = None,
    rank: Rank = RANK_ZERO,
    subprocess_init: Optional[Callable[[], None]] = None,
) -> Generator[Tuple[int, Optional[Stats], Stats, Optional[Stats]], None, None]:
    """Each epoch/pass, for each partition pair, loads in embeddings and edgelist
    from disk, runs HOGWILD training on them, and writes partitions back to disk.
    """
    tag_logs_with_process_name(f"Trainer-{rank}")

    if config.verbose > 0:
        import pprint
        pprint.PrettyPrinter().pprint(config.to_dict())

    logger.info("Loading entity counts...")
    entity_counts: Dict[str, List[int]] = {}
    for entity, econf in config.entities.items():
        entity_counts[entity] = []
        for part in range(econf.num_partitions):
            with open(os.path.join(
                config.entity_path, "entity_count_%s_%d.txt" % (entity, part)
            ), "rt") as tf:
                entity_counts[entity].append(int(tf.read().strip()))

    # Figure out how many lhs and rhs partitions we need
    nparts_lhs, lhs_partitioned_types = get_partitioned_types(config, Side.LHS)
    nparts_rhs, rhs_partitioned_types = get_partitioned_types(config, Side.RHS)
    logger.debug(
        f"nparts {nparts_lhs} {nparts_rhs} "
        f"types {lhs_partitioned_types} {rhs_partitioned_types}")
    total_buckets = nparts_lhs * nparts_rhs

    sync: AbstractSynchronizer
    bucket_scheduler: AbstractBucketScheduler
    parameter_sharer: Optional[ParameterSharer]
    partition_client: Optional[PartitionClient]
    if config.num_machines > 1:
        if not 0 <= rank < config.num_machines:
            raise RuntimeError("Invalid rank for trainer")
        if not td.is_available():
            raise RuntimeError("The installed PyTorch version doesn't provide "
                               "distributed training capabilities.")
        ranks = ProcessRanks.from_num_invocations(
            config.num_machines, config.num_partition_servers)

        if rank == RANK_ZERO:
            logger.info("Setup lock server...")
            start_server(
                LockServer(
                    num_clients=len(ranks.trainers),
                    nparts_lhs=nparts_lhs,
                    nparts_rhs=nparts_rhs,
                    lock_lhs=len(lhs_partitioned_types) > 0,
                    lock_rhs=len(rhs_partitioned_types) > 0,
                    init_tree=config.distributed_tree_init_order,
                ),
                process_name="LockServer",
                init_method=config.distributed_init_method,
                world_size=ranks.world_size,
                server_rank=ranks.lock_server,
                groups=[ranks.trainers],
                subprocess_init=subprocess_init,
            )

        bucket_scheduler = DistributedBucketScheduler(
            server_rank=ranks.lock_server,
            client_rank=ranks.trainers[rank],
        )

        logger.info("Setup param server...")
        start_server(
            ParameterServer(num_clients=len(ranks.trainers)),
            process_name=f"ParamS-{rank}",
            init_method=config.distributed_init_method,
            world_size=ranks.world_size,
            server_rank=ranks.parameter_servers[rank],
            groups=[ranks.trainers],
            subprocess_init=subprocess_init,
        )

        parameter_sharer = ParameterSharer(
            process_name=f"ParamC-{rank}",
            client_rank=ranks.parameter_clients[rank],
            all_server_ranks=ranks.parameter_servers,
            init_method=config.distributed_init_method,
            world_size=ranks.world_size,
            groups=[ranks.trainers],
            subprocess_init=subprocess_init,
        )

        if config.num_partition_servers == -1:
            start_server(
                ParameterServer(num_clients=len(ranks.trainers)),
                process_name=f"PartS-{rank}",
                init_method=config.distributed_init_method,
                world_size=ranks.world_size,
                server_rank=ranks.partition_servers[rank],
                groups=[ranks.trainers],
                subprocess_init=subprocess_init,
            )

        if len(ranks.partition_servers) > 0:
            partition_client = PartitionClient(ranks.partition_servers)
        else:
            partition_client = None

        groups = init_process_group(
            rank=ranks.trainers[rank],
            world_size=ranks.world_size,
            init_method=config.distributed_init_method,
            groups=[ranks.trainers],
        )
        trainer_group, = groups
        sync = DistributedSynchronizer(trainer_group)

    else:
        sync = DummySynchronizer()
        bucket_scheduler = SingleMachineBucketScheduler(
            nparts_lhs, nparts_rhs, config.bucket_order)
        parameter_sharer = None
        partition_client = None
        hide_distributed_logging()

    # fork early for HOGWILD threads
    logger.info("Creating workers...")
    num_workers = get_num_workers(config.workers)
    pool = create_pool(
        num_workers,
        subprocess_name=f"TWorker-{rank}",
        subprocess_init=subprocess_init,
    )

    def make_optimizer(params: Iterable[torch.nn.Parameter], is_emb: bool) -> Optimizer:
        params = list(params)
        if len(params) == 0:
            optimizer = DummyOptimizer()
        elif is_emb:
            optimizer = RowAdagrad(params, lr=config.lr)
        else:
            if config.relation_lr is not None:
                lr = config.relation_lr
            else:
                lr = config.lr
            optimizer = Adagrad(params, lr=lr)
        optimizer.share_memory()
        return optimizer

    # background_io is only supported in single-machine mode
    background_io = config.background_io and config.num_machines == 1

    checkpoint_manager = CheckpointManager(
        config.checkpoint_path,
        background=background_io,
        rank=rank,
        num_machines=config.num_machines,
        partition_client=partition_client,
        subprocess_name=f"BackgRW-{rank}",
        subprocess_init=subprocess_init,
    )
    checkpoint_manager.register_metadata_provider(ConfigMetadataProvider(config))
    checkpoint_manager.write_config(config)

    iteration_manager = IterationManager(
        config.num_epochs, config.edge_paths, config.num_edge_chunks,
        iteration_idx=checkpoint_manager.checkpoint_version)
    checkpoint_manager.register_metadata_provider(iteration_manager)

    if config.init_path is not None:
        loadpath_manager = CheckpointManager(config.init_path)
    else:
        loadpath_manager = None

    def load_embeddings(
        entity: EntityName,
        part: Partition,
        strict: bool = False,
        force_dirty: bool = False,
    ) -> Tuple[torch.nn.Parameter, Optional[OptimizerStateDict]]:
        if strict:
            embs, optim_state = checkpoint_manager.read(entity, part,
                                                        force_dirty=force_dirty)
        else:
            # Strict is only false during the first iteration, because in that
            # case the checkpoint may not contain any data (unless a previous
            # run was resumed) so we fall back on initial values.
            embs, optim_state = checkpoint_manager.maybe_read(entity, part,
                                                              force_dirty=force_dirty)
            if embs is None and loadpath_manager is not None:
                embs, optim_state = loadpath_manager.maybe_read(entity, part)
            if embs is None:
                embs, optim_state = init_embs(entity, entity_counts[entity][part],
                                              config.dimension, config.init_scale)
        assert embs.is_shared()
        return torch.nn.Parameter(embs), optim_state

    logger.info("Initializing global model...")

    if model is None:
        model = make_model(config)
    model.share_memory()
    if trainer is None:
        trainer = Trainer(
            global_optimizer=make_optimizer(model.parameters(), False),
            loss_fn=config.loss_fn,
            margin=config.margin,
            relations=config.relations,
        )
    if evaluator is None:
        evaluator = TrainingRankingEvaluator(
            override_num_batch_negs=config.eval_num_batch_negs,
            override_num_uniform_negs=config.eval_num_uniform_negs,
        )
    eval_batch_size = round_up_to_nearest_multiple(config.batch_size, config.eval_num_batch_negs)

    state_dict, optim_state = checkpoint_manager.maybe_read_model()

    if state_dict is None and loadpath_manager is not None:
        state_dict, optim_state = loadpath_manager.maybe_read_model()
    if state_dict is not None:
        model.load_state_dict(state_dict, strict=False)
    if optim_state is not None:
        trainer.global_optimizer.load_state_dict(optim_state)

    logger.debug("Loading unpartitioned entities...")
    for entity, econfig in config.entities.items():
        if econfig.num_partitions == 1:
            embs, optim_state = load_embeddings(entity, Partition(0))
            model.set_embeddings(entity, embs, Side.LHS)
            model.set_embeddings(entity, embs, Side.RHS)
            optimizer = make_optimizer([embs], True)
            if optim_state is not None:
                optimizer.load_state_dict(optim_state)
            trainer.entity_optimizers[(entity, Partition(0))] = optimizer

    # start communicating shared parameters with the parameter server
    if parameter_sharer is not None:
        parameter_sharer.share_model_params(model)

    strict = False

    def swap_partitioned_embeddings(
        old_b: Optional[Bucket],
        new_b: Optional[Bucket],
    ):
        # 0. given the old and new buckets, construct data structures to keep
        #    track of old and new embedding (entity, part) tuples

        io_bytes = 0
        logger.info(f"Swapping partitioned embeddings {old_b} {new_b}")

        types = ([(e, Side.LHS) for e in lhs_partitioned_types]
                 + [(e, Side.RHS) for e in rhs_partitioned_types])
        old_parts = {(e, old_b.get_partition(side)): side
                     for e, side in types if old_b is not None}
        new_parts = {(e, new_b.get_partition(side)): side
                     for e, side in types if new_b is not None}

        to_checkpoint = set(old_parts) - set(new_parts)
        preserved = set(old_parts) & set(new_parts)

        # 1. checkpoint embeddings that will not be used in the next pair
        #
        if old_b is not None:  # there are previous embeddings to checkpoint
            logger.info("Writing partitioned embeddings")
            for entity, part in to_checkpoint:
                side = old_parts[(entity, part)]
                side_name = side.pick("lhs", "rhs")
                logger.debug(f"Checkpointing ({entity} {part} {side_name})")
                embs = model.get_embeddings(entity, side)
                optim_key = (entity, part)
                optim_state = OptimizerStateDict(trainer.entity_optimizers[optim_key].state_dict())
                io_bytes += embs.numel() * embs.element_size()  # ignore optim state
                checkpoint_manager.write(entity, part, embs.detach(), optim_state)
                if optim_key in trainer.entity_optimizers:
                    del trainer.entity_optimizers[optim_key]
                # these variables are holding large objects; let them be freed
                del embs
                del optim_state

            bucket_scheduler.release_bucket(old_b)

        # 2. copy old embeddings that will be used in the next pair
        #    into a temporary dictionary
        #
        tmp_emb = {x: model.get_embeddings(x[0], old_parts[x]) for x in preserved}

        for entity, _ in types:
            model.clear_embeddings(entity, Side.LHS)
            model.clear_embeddings(entity, Side.RHS)

        if new_b is None:  # there are no new embeddings to load
            return io_bytes

        bucket_logger = BucketLogger(logger, bucket=new_b)

        # 3. load new embeddings into the model/optimizer, either from disk
        #    or the temporary dictionary
        #
        bucket_logger.info("Loading entities")
        for entity, side in types:
            part = new_b.get_partition(side)
            part_key = (entity, part)
            if part_key in tmp_emb:
                bucket_logger.debug(f"Loading ({entity}, {part}) from preserved")
                embs, optim_state = tmp_emb[part_key], None
            else:
                bucket_logger.debug(f"Loading ({entity}, {part})")

                force_dirty = bucket_scheduler.check_and_set_dirty(entity, part)
                embs, optim_state = load_embeddings(
                    entity, part, strict=strict, force_dirty=force_dirty)
                io_bytes += embs.numel() * embs.element_size()  # ignore optim state

            model.set_embeddings(entity, embs, side)
            tmp_emb[part_key] = embs

            optim_key = (entity, part)
            if optim_key not in trainer.entity_optimizers:
                bucket_logger.debug(f"Resetting optimizer {optim_key}")
                optimizer = make_optimizer([embs], True)
                if optim_state is not None:
                    bucket_logger.debug("Setting optim state")
                    optimizer.load_state_dict(optim_state)

                trainer.entity_optimizers[optim_key] = optimizer

        return io_bytes

    # Start of the main training loop.
    for epoch_idx, edge_path_idx, edge_chunk_idx in iteration_manager:
        logger.info(
            f"Starting epoch {epoch_idx + 1} / {iteration_manager.num_epochs}, "
            f"edge path {edge_path_idx + 1} / {iteration_manager.num_edge_paths}, "
            f"edge chunk {edge_chunk_idx + 1} / {iteration_manager.num_edge_chunks}")
        edge_reader = EdgeReader(iteration_manager.edge_path)
        logger.info(f"Edge path: {iteration_manager.edge_path}")

        sync.barrier()
        dist_logger.info("Lock client new epoch...")
        bucket_scheduler.new_pass(is_first=iteration_manager.iteration_idx == 0)
        sync.barrier()

        remaining = total_buckets
        cur_b = None
        while remaining > 0:
            old_b = cur_b
            io_time = 0.
            io_bytes = 0
            cur_b, remaining = bucket_scheduler.acquire_bucket()
            logger.info(f"still in queue: {remaining}")
            if cur_b is None:
                if old_b is not None:
                    # if you couldn't get a new pair, release the lock
                    # to prevent a deadlock!
                    tic = time.time()
                    io_bytes += swap_partitioned_embeddings(old_b, None)
                    io_time += time.time() - tic
                time.sleep(1)  # don't hammer td
                continue

            bucket_logger = BucketLogger(logger, bucket=cur_b)

            tic = time.time()

            io_bytes += swap_partitioned_embeddings(old_b, cur_b)

            current_index = \
                (iteration_manager.iteration_idx + 1) * total_buckets - remaining

            next_b = bucket_scheduler.peek()
            if next_b is not None and background_io:
                # Ensure the previous bucket finished writing to disk.
                checkpoint_manager.wait_for_marker(current_index - 1)

                bucket_logger.debug("Prefetching")
                for entity in lhs_partitioned_types:
                    checkpoint_manager.prefetch(entity, next_b.lhs)
                for entity in rhs_partitioned_types:
                    checkpoint_manager.prefetch(entity, next_b.rhs)

                checkpoint_manager.record_marker(current_index)

            bucket_logger.debug("Loading edges")
            edges = edge_reader.read(
                cur_b.lhs, cur_b.rhs, edge_chunk_idx, config.num_edge_chunks)
            num_edges = len(edges)
            # this might be off in the case of tensorlist or extra edge fields
            io_bytes += edges.lhs.tensor.numel() * edges.lhs.tensor.element_size()
            io_bytes += edges.rhs.tensor.numel() * edges.rhs.tensor.element_size()
            io_bytes += edges.rel.numel() * edges.rel.element_size()

            bucket_logger.debug("Shuffling edges")
            # Fix a seed to get the same permutation every time; have it
            # depend on all and only what affects the set of edges.
            g = torch.Generator()
            g.manual_seed(hash((edge_path_idx, edge_chunk_idx, cur_b.lhs, cur_b.rhs)))

            num_eval_edges = int(num_edges * config.eval_fraction)
            if num_eval_edges > 0:
                edge_perm = torch.randperm(num_edges, generator=g)
                eval_edge_perm = edge_perm[-num_eval_edges:]
                num_edges -= num_eval_edges
                edge_perm = edge_perm[torch.randperm(num_edges)]
            else:
                edge_perm = torch.randperm(num_edges)

            # HOGWILD evaluation before training
            eval_stats_before: Optional[Stats] = None
            if num_eval_edges > 0:
                bucket_logger.debug("Waiting for workers to perform evaluation")
                future_all_eval_stats_before = pool.map_async(call, [
                    partial(
                        process_in_batches,
                        batch_size=eval_batch_size,
                        model=model,
                        batch_processor=evaluator,
                        edges=edges,
                        indices=eval_edge_perm[s],
                    )
                    for s in split_almost_equally(eval_edge_perm.size(0),
                                                  num_parts=num_workers)
                ])
                all_eval_stats_before = \
                    get_async_result(future_all_eval_stats_before, pool)
                eval_stats_before = Stats.sum(all_eval_stats_before).average()
                bucket_logger.info(f"Stats before training: {eval_stats_before}")

            io_time += time.time() - tic
            tic = time.time()
            # HOGWILD training
            bucket_logger.debug("Waiting for workers to perform training")
            # FIXME should we only delay if iteration_idx == 0?
            future_all_stats = pool.map_async(call, [
                partial(
                    process_in_batches,
                    batch_size=config.batch_size,
                    model=model,
                    batch_processor=trainer,
                    edges=edges,
                    indices=edge_perm[s],
                    delay=config.hogwild_delay if epoch_idx == 0 and rank > 0 else 0,
                )
                for rank, s in enumerate(split_almost_equally(edge_perm.size(0),
                                                              num_parts=num_workers))
            ])
            all_stats = get_async_result(future_all_stats, pool)
            stats = Stats.sum(all_stats).average()
            compute_time = time.time() - tic

            bucket_logger.info(
                f"bucket {total_buckets - remaining} / {total_buckets} : "
                f"Processed {num_edges} edges in {compute_time:.2f} s "
                f"( {num_edges / compute_time / 1e6:.2g} M/sec ); "
                f"io: {io_time:.2f} s ( {io_bytes / io_time / 1e6:.2f} MB/sec )")
            bucket_logger.info(f"{stats}")

            # HOGWILD eval after training
            eval_stats_after: Optional[Stats] = None
            if num_eval_edges > 0:
                bucket_logger.debug("Waiting for workers to perform evaluation")
                future_all_eval_stats_after = pool.map_async(call, [
                    partial(
                        process_in_batches,
                        batch_size=eval_batch_size,
                        model=model,
                        batch_processor=evaluator,
                        edges=edges,
                        indices=eval_edge_perm[s],
                    )
                    for s in split_almost_equally(eval_edge_perm.size(0),
                                                  num_parts=num_workers)
                ])
                all_eval_stats_after = \
                    get_async_result(future_all_eval_stats_after, pool)
                eval_stats_after = Stats.sum(all_eval_stats_after).average()
                bucket_logger.info(f"Stats after training: {eval_stats_after}")

            # Add train/eval metrics to queue
            yield current_index, eval_stats_before, stats, eval_stats_after

        swap_partitioned_embeddings(cur_b, None)

        # Distributed Processing: all machines can leave the barrier now.
        sync.barrier()

        # Preserving a checkpoint requires two steps:
        # - create a snapshot (w/ symlinks) after it's first written;
        # - don't delete it once the following one is written.
        # These two happen in two successive iterations of the main loop: the
        # one just before and the one just after the epoch boundary.
        preserve_old_checkpoint = should_preserve_old_checkpoint(
            iteration_manager, config.checkpoint_preservation_interval)
        preserve_new_checkpoint = should_preserve_old_checkpoint(
            iteration_manager + 1, config.checkpoint_preservation_interval)

        # Write metadata: for multiple machines, write from rank-0
        logger.info(
            f"Finished epoch {epoch_idx + 1} / {iteration_manager.num_epochs}, "
            f"edge path {edge_path_idx + 1} / {iteration_manager.num_edge_paths}, "
            f"edge chunk {edge_chunk_idx + 1} / {iteration_manager.num_edge_chunks}")
        if rank == 0:
            for entity, econfig in config.entities.items():
                if econfig.num_partitions == 1:
                    embs = model.get_embeddings(entity, Side.LHS)
                    optimizer = trainer.entity_optimizers[(entity, Partition(0))]

                    checkpoint_manager.write(
                        entity, Partition(0),
                        embs.detach(), OptimizerStateDict(optimizer.state_dict()))

            sanitized_state_dict: ModuleStateDict = {}
            for k, v in ModuleStateDict(model.state_dict()).items():
                if k.startswith('lhs_embs') or k.startswith('rhs_embs'):
                    # skipping state that's an entity embedding
                    continue
                sanitized_state_dict[k] = v

            logger.info("Writing the metadata")
            checkpoint_manager.write_model(
                sanitized_state_dict,
                OptimizerStateDict(trainer.global_optimizer.state_dict()),
            )

        logger.info("Writing the checkpoint")
        checkpoint_manager.write_new_version(config)

        dist_logger.info("Waiting for other workers to write their parts of the checkpoint")
        sync.barrier()
        dist_logger.info("All parts of the checkpoint have been written")

        logger.info("Switching to the new checkpoint version")
        checkpoint_manager.switch_to_new_version()

        dist_logger.info("Waiting for other workers to switch to the new checkpoint version")
        sync.barrier()
        dist_logger.info("All workers have switched to the new checkpoint version")

        # After all the machines have finished committing
        # checkpoints, we either remove the old checkpoints
        # or we preserve it
        if preserve_new_checkpoint:
            # Add 1 so the index is a multiple of the interval, it looks nicer.
            checkpoint_manager.preserve_current_version(config, epoch_idx + 1)
        if not preserve_old_checkpoint:
            checkpoint_manager.remove_old_version(config)

        # now we're sure that all partition files exist,
        # so be strict about loading them
        strict = True

    # quiescence
    pool.close()
    pool.join()

    sync.barrier()

    checkpoint_manager.close()
    if loadpath_manager is not None:
        loadpath_manager.close()

    # FIXME join distributed workers (not really necessary)

    logger.info("Exiting")
Ejemplo n.º 25
0
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = True,
        transform: Optional[ImageTform] = None,
        label_map: Optional[Dict[str, int]] = None,
        colors: Optional[List[int]] = None,
        num_colors: int = 10,
        scale: float = 0.2,
        correlation: Optional[float] = None,
        binarize: bool = False,
        greyscale: bool = False,
        background: bool = False,
        black: bool = True,
        split: Optional[Union[ColoredMNISTSplit, str]] = None,
        seed: Optional[int] = 42,
    ) -> None:
        self.split = (str_to_enum(str_=split, enum=ColoredMNISTSplit)
                      if isinstance(split, str) else split)
        self.label_map = label_map
        self.scale = scale
        self.num_colors = num_colors
        self.colors = colors
        self.background = background
        self.binarize = binarize
        self.black = black
        self.greyscale = greyscale
        self.seed = seed
        # Note: a correlation coefficient of '1' corresponds to perfect correlation between
        # digit and class while a correlation coefficient of '-1' corresponds to perfect
        # anti-correlation.
        if correlation is None:
            correlation = 1.0 if split is ColoredMNISTSplit.train else 0.5
        if not 0 <= correlation <= 1:
            raise ValueError(
                "Strength of correlation between colour and targets must be between 0 and 1."
            )
        self.correlation = correlation

        if self.split is None:
            x_ls, y_ls = [], []
            for _split in ColoredMNISTSplit:
                base_dataset = MNIST(root=str(root),
                                     download=download,
                                     train=_split is ColoredMNISTSplit.train)
                x_ls.append(base_dataset.data)
                y_ls.append(base_dataset.targets)
            x = torch.cat(x_ls, dim=0)
            y = torch.cat(y_ls, dim=0)
        else:
            base_dataset = MNIST(root=str(root),
                                 download=download,
                                 train=self.split is ColoredMNISTSplit.train)
            x = base_dataset.data
            y = base_dataset.targets

        if self.label_map is not None:
            x, y = _filter_data_by_labels(data=x,
                                          targets=y,
                                          label_map=self.label_map)
        s = y % self.num_colors
        s_unique, s_unique_inv = s.unique(return_inverse=True)

        generator = (torch.default_generator if self.seed is None else
                     torch.Generator().manual_seed(self.seed))
        inv_card_s = 1 / len(s_unique)
        if self.correlation < 1:
            flip_prop = self.correlation * (1.0 - inv_card_s) + inv_card_s
            # Change the values of randomly-selected labels to values other than their original ones
            num_to_flip = round((1 - flip_prop) * len(s))
            to_flip = torch.randperm(len(s), generator=generator)[:num_to_flip]
            s_unique_inv[to_flip] += torch.randint(low=1,
                                                   high=len(s_unique),
                                                   size=(num_to_flip, ))
            # s labels live inside the Z/(num_colors * Z) ring
            s_unique_inv[to_flip] %= len(s_unique)
            s = s_unique[s_unique_inv]

        # Convert the greyscale iamges of shape ( H, W ) into 'colour' images of shape ( C, H, W )
        colorizer = MNISTColorizer(
            scale=self.scale,
            background=self.background,
            black=self.black,
            binarize=self.binarize,
            greyscale=self.greyscale,
            color_indices=self.colors,
            seed=self.seed,
        )
        x_colorized = colorizer(images=x, labels=s)
        # Convert to HWC format for compatibility with transforms
        x_colorized = x_colorized.movedim(1, -1).numpy().astype(np.uint8)

        super().__init__(x=x_colorized,
                         y=y,
                         s=s,
                         transform=transform,
                         image_dir=root)
Ejemplo n.º 26
0
    def forward(cls, ctx, input_x, drop_rate=0.5, target_fraction=1.0, train=False, inplace=False, unit_test_mode=False):
        rand_gen = torch.Generator()
        if unit_test_mode:
            rand_gen.manual_seed(353)
        if drop_rate < 0 or drop_rate > 1:
            raise ValueError("dropout probability has to be between 0 and 1, "
                             "but got {}".format(drop_rate))
        if inplace:
            raise NotImplementedError("In place computations haven't been tested yet!")
        ctx.p = drop_rate
        ctx.train = train
        ctx.inplace = inplace
        ctx.input = input_x
        
        if ctx.inplace:
            ctx.mark_dirty(input_x)
            output = input_x
        else:
            output = input_x.clone()

        if ctx.p > 0 and ctx.train:
            ctx.noise = cls._make_noise(input_x)
            if ctx.p == 1:
                ctx.noise.fill_(0)
            else:
                ctx.noise.bernoulli_(1 - ctx.p, generator=rand_gen).div_(1 - ctx.p)

            is_filter_map = False
            if input_x.dim() > 3:
                is_filter_map = True

            if target_fraction < 1.0:
                if is_filter_map:
                    input_shape = input_x.size()
                    batch_size = input_shape[0]
                    num_filters = input_shape[1]

                    input_flattened_abs = torch.norm(input_x.view([batch_size, num_filters, -1]), 2, dim=2)
                    feature_shape = input_flattened_abs.size()[1]

                    n_features_to_drop = int(feature_shape*target_fraction)
                    sorted_indices_per_row = torch.argsort(input_flattened_abs, dim=1)
                    nth_ranked_feature_value_per_row = input_flattened_abs.gather(1,sorted_indices_per_row)[:,n_features_to_drop].view([-1,1])
                    targeting_mask = input_flattened_abs.lt(nth_ranked_feature_value_per_row)[:,:,None,None] # .view(input_shape)
                    # print('targeting_mask',targeting_mask)
                    ctx.noise = ctx.noise.where(targeting_mask, torch.tensor([1.0]).type(input_x.dtype).to(input_x.device))
                else:
                    input_shape = input_x.size()
                    batch_size = input_shape[0]
                    input_flattened_abs = torch.abs(input_x.view([batch_size, -1]))
                    feature_shape = input_flattened_abs.size()[1]

                    n_features_to_drop = int(feature_shape*target_fraction)
                    sorted_indices_per_column = torch.argsort(input_flattened_abs, dim=1)
                    nth_ranked_feature_value_per_column = input_flattened_abs.gather(1,sorted_indices_per_column)[:,n_features_to_drop].view([-1,1])
                    targeting_mask = input_flattened_abs.lt(nth_ranked_feature_value_per_column).view(input_shape)
                    print(targeting_mask)
                    ctx.noise = ctx.noise.where(targeting_mask, torch.tensor([1.0]).type(input_x.dtype).to(input_x.device))

            output.mul_(ctx.noise)
        return output
    def __intialise_dataset(self):
        ############ Determine dataset ############
        if self.parameters.dataset == SelectableDatasets.BPI2012:
            self.dataset = XESDataset(
                device=self.device,
                file_path=EnviromentParameters.BPI2020Dataset.file_path,
                preprocessed_folder_path=EnviromentParameters.BPI2020Dataset.preprocessed_foldr_path,
                preprocessed_df_type=EnviromentParameters.BPI2020Dataset.preprocessed_df_type,
                include_types=self.parameters.bpi2012.BPI2012_include_types,
            )
        elif self.parameters.dataset == SelectableDatasets.Diabetes:
            self.feature_names = EnviromentParameters.DiabetesDataset.feature_names
            self.dataset = MedicalDataset(
                device=self.device,
                file_path= EnviromentParameters.DiabetesDataset.file_path,
                feature_names=EnviromentParameters.DiabetesDataset.feature_names,
                target_col_name=EnviromentParameters.DiabetesDataset.target_name
            )
        elif self.parameters.dataset == SelectableDatasets.Helpdesk:
            self.dataset = XESDataset(
                device=self.device,
                file_path=EnviromentParameters.HelpDeskDataset.file_path,
                preprocessed_folder_path=EnviromentParameters.HelpDeskDataset.preprocessed_foldr_path,
                preprocessed_df_type=EnviromentParameters.HelpDeskDataset.preprocessed_df_type,
            )
        elif self.parameters.dataset == SelectableDatasets.BreastCancer:
            self.feature_names = EnviromentParameters.BreastCancerDataset.feature_names
            self.dataset = MedicalDataset(
                device=self.device,
                file_path= EnviromentParameters.BreastCancerDataset.file_path,
                feature_names=EnviromentParameters.BreastCancerDataset.feature_names,
                target_col_name=EnviromentParameters.BreastCancerDataset.target_name
            )
        else:
            raise NotSupportedError("Dataset you selected is not supported")

        # Create datasets
        # Lengths for each set
        train_dataset_len = int(
            len(self.dataset) * self.parameters.train_test_split_portion[0]
        )
        test_dataset_len = int(
            len(self.dataset) * self.parameters.train_test_split_portion[-1]
        )
        validation_dataset_len = len(self.dataset) - (
            train_dataset_len + test_dataset_len
        )

        # Split the dataset
        (
            self.train_dataset,
            self.validation_dataset,
            self.test_dataset,
        ) = torch.utils.data.random_split(
            dataset=self.dataset,
            lengths=[train_dataset_len,
                     validation_dataset_len, test_dataset_len],
            generator=torch.Generator().manual_seed(
                self.parameters.dataset_split_seed
            ),
        )

        # Initialise dataloaders
        self.train_data_loader = DataLoader(
            self.train_dataset,
            batch_size=self.parameters.batch_size,
            shuffle=self.train_dataset.dataset.get_train_shuffle(),
            collate_fn=self.dataset.collate_fn,
            sampler= self.train_dataset.dataset.get_sampler_from_df(self.train_dataset[:], self.parameters.dataset_split_seed)
            # num_workers=4,
            # worker_init_fn=lambda _: np.random.seed(int(torch.initial_seed()) % (2**32-1)),

        )
        self.validation_data_loader = DataLoader(
            self.validation_dataset,
            batch_size=self.parameters.batch_size,
            shuffle=True,
            collate_fn=self.dataset.collate_fn,
        )
        self.test_data_loader = DataLoader(
            self.test_dataset,
            batch_size=self.parameters.batch_size,
            shuffle=True,
            collate_fn=self.dataset.collate_fn,
        )
Ejemplo n.º 28
0
train_coefficients = hyperparam_conf['train_coefficients']

# %% mnist config
dataset_config = conf['mnist_config']
max_rate = dataset_config['max_rate']
use_transform = dataset_config['use_transform']

# %% transform config
if use_transform == True:
    rand_transform = get_rand_transform(conf['transform'])
else:
    rand_transform = None

# load mnist training dataset
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=rand_transform)
mnist_trainset, mnist_devset = random_split(mnist_trainset, [50000, 10000], generator=torch.Generator().manual_seed(42))
# load mnist test dataset
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None)

# acc file name
acc_file_name = experiment_name + '_' + conf['acc_file_name']

# %% define model
class mysnn(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.length = length
        self.batch_size = batch_size

        self.train_coefficients = train_coefficients
 def shuffle(self, epoch):
     # deterministically shuffle based on epoch
     g = torch.Generator()
     g.manual_seed(epoch)
     bin_ids = list(torch.randperm(len(self.bins), generator=g))
     self.bins = [self.bins[i] for i in bin_ids]
    def __init__(
        self,
        data_dir: str,
        val_split: float = 0.2,
        test_split: float = 0.1,
        num_workers: int = 16,
        batch_size: int = 32,
        seed: int = 42,
        *args,
        **kwargs,
    ):
        """
        Kitti train, validation and test dataloaders.

        Note:
            You need to have downloaded the Kitti dataset first and provide the path to where it is saved.
            You can download the dataset here:
            http://www.cvlibs.net/datasets/kitti/eval_semseg.php?benchmark=semantics2015

        Specs:
            - 200 samples
            - Each image is (3 x 1242 x 376)

        In total there are 34 classes but some of these are not useful so by default we use only 19 of the classes
        specified by the `valid_labels` parameter.

        Example::

            from pl_bolts.datamodules import KittiDataModule

            dm = KittiDataModule(PATH)
            model = LitModel()

            Trainer().fit(model, dm)

        Args:
            data_dir: where to load the data from path, i.e. '/path/to/folder/with/data_semantics/'
            val_split: size of validation test (default 0.2)
            test_split: size of test set (default 0.1)
            num_workers: how many workers to use for loading data
            batch_size: the batch size
            seed: random seed to be used for train/val/test splits
        """
        if not _TORCHVISION_AVAILABLE:
            raise ModuleNotFoundError(  # pragma: no-cover
                'You want to use `torchvision` which is not installed yet.')

        super().__init__(*args, **kwargs)
        self.data_dir = data_dir if data_dir is not None else os.getcwd()
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.seed = seed

        self.default_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.35675976, 0.37380189, 0.3764753],
                                 std=[0.32064945, 0.32098866, 0.32325324])
        ])

        # split into train, val, test
        kitti_dataset = KittiDataset(self.data_dir,
                                     transform=self.default_transforms)

        val_len = round(val_split * len(kitti_dataset))
        test_len = round(test_split * len(kitti_dataset))
        train_len = len(kitti_dataset) - val_len - test_len

        self.trainset, self.valset, self.testset = random_split(
            kitti_dataset,
            lengths=[train_len, val_len, test_len],
            generator=torch.Generator().manual_seed(self.seed))