コード例 #1
0
    def testHogwildStability_RowAdagrad(self):
        NE = 10000
        model = nn.Embedding(NE, 100)
        optimizer = RowAdagrad(model.parameters())
        num_processes = mp.cpu_count() // 2 + 1
        self._stress_optimizer(model, optimizer, num_processes=num_processes)

        # This fails for Adagrad because it's not stable
        self.assertLess(model.weight.abs().max(), 1000)
コード例 #2
0
ファイル: train.py プロジェクト: yueyedeai/PyTorch-BigGraph
 def make_optimizer(params: Iterable[torch.nn.Parameter], is_emb: bool) -> Optimizer:
     params = list(params)
     if len(params) == 0:
         optimizer = DummyOptimizer()
     elif is_emb:
         optimizer = RowAdagrad(params, lr=config.lr)
     else:
         if config.relation_lr is not None:
             lr = config.relation_lr
         else:
             lr = config.lr
         optimizer = Adagrad(params, lr=lr)
     optimizer.share_memory()
     return optimizer
コード例 #3
0
    def do_one_job(  # noqa
        self,
        lhs_types: Set[str],
        rhs_types: Set[str],
        lhs_part: Partition,
        rhs_part: Partition,
        lhs_subpart: SubPartition,
        rhs_subpart: SubPartition,
        next_lhs_subpart: Optional[SubPartition],
        next_rhs_subpart: Optional[SubPartition],
        model: MultiRelationEmbedder,
        trainer: Trainer,
        all_embs: Dict[Tuple[EntityName, Partition], FloatTensorType],
        subpart_slices: Dict[Tuple[EntityName, Partition, SubPartition],
                             slice],
        subbuckets: Dict[Tuple[int, int], Tuple[LongTensorType, LongTensorType,
                                                LongTensorType]],
        batch_size: int,
        lr: float,
    ) -> Stats:
        tk = TimeKeeper()

        for embeddings in all_embs.values():
            assert embeddings.is_pinned()

        occurrences: Dict[Tuple[EntityName, Partition, SubPartition],
                          Set[Side]] = defaultdict(set)
        for entity_name in lhs_types:
            occurrences[entity_name, lhs_part, lhs_subpart].add(Side.LHS)
        for entity_name in rhs_types:
            occurrences[entity_name, rhs_part, rhs_subpart].add(Side.RHS)

        if lhs_part != rhs_part:  # Bipartite
            assert all(len(v) == 1 for v in occurrences.values())

        tk.start("copy_to_device")
        for entity_name, part, subpart in occurrences.keys():
            if (entity_name, part, subpart) in self.sub_holder:
                continue
            embeddings = all_embs[entity_name, part]
            optimizer = trainer.partitioned_optimizers[entity_name, part]
            subpart_slice = subpart_slices[entity_name, part, subpart]

            # TODO have two permanent storages on GPU and move stuff in and out
            # from them
            # logger.info(f"GPU #{self.gpu_idx} allocating {(subpart_slice.stop - subpart_slice.start) * embeddings.shape[1] * 4:,} bytes")
            gpu_embeddings = torch.empty(
                (subpart_slice.stop - subpart_slice.start,
                 embeddings.shape[1]),
                dtype=torch.float32,
                device=self.my_device,
            )
            gpu_embeddings.copy_(embeddings[subpart_slice], non_blocking=True)
            gpu_embeddings = torch.nn.Parameter(gpu_embeddings)
            gpu_optimizer = RowAdagrad([gpu_embeddings], lr=lr)
            (cpu_state, ) = optimizer.state.values()
            (gpu_state, ) = gpu_optimizer.state.values()
            # logger.info(f"GPU #{self.gpu_idx} allocating {(subpart_slice.stop - subpart_slice.start) * 4:,} bytes")
            gpu_state["sum"].copy_(cpu_state["sum"][subpart_slice],
                                   non_blocking=True)

            self.sub_holder[entity_name, part, subpart] = (
                gpu_embeddings,
                gpu_optimizer,
            )
        logger.debug(
            f"Time spent copying subparts to GPU: {tk.stop('copy_to_device'):.4f} s"
        )

        for (
            (entity_name, part, subpart),
            (gpu_embeddings, gpu_optimizer),
        ) in self.sub_holder.items():
            for side in occurrences[entity_name, part, subpart]:
                model.set_embeddings(entity_name, side, gpu_embeddings)
                trainer.partitioned_optimizers[entity_name, part,
                                               subpart] = gpu_optimizer

        tk.start("translate_edges")
        num_edges = subbuckets[lhs_subpart, rhs_subpart][0].shape[0]
        edge_perm = torch.randperm(num_edges)
        edges_lhs, edges_rhs, edges_rel = subbuckets[lhs_subpart, rhs_subpart]
        _C.shuffle(edges_lhs, edge_perm, os.cpu_count())
        _C.shuffle(edges_rhs, edge_perm, os.cpu_count())
        _C.shuffle(edges_rel, edge_perm, os.cpu_count())
        assert edges_lhs.is_pinned()
        assert edges_rhs.is_pinned()
        assert edges_rel.is_pinned()
        gpu_edges = EdgeList(
            EntityList.from_tensor(edges_lhs),
            EntityList.from_tensor(edges_rhs),
            edges_rel,
        ).to(self.my_device, non_blocking=True)
        logger.debug(f"GPU #{self.gpu_idx} got {num_edges} edges")
        logger.debug(
            f"Time spent copying edges to GPU: {tk.stop('translate_edges'):.4f} s"
        )

        tk.start("processing")
        stats = process_in_batches(batch_size=batch_size,
                                   model=model,
                                   batch_processor=trainer,
                                   edges=gpu_edges)
        logger.debug(f"Time spent processing: {tk.stop('processing'):.4f} s")

        next_occurrences: Dict[Tuple[EntityName, Partition, SubPartition],
                               Set[Side]] = defaultdict(set)
        if next_lhs_subpart is not None:
            for entity_name in lhs_types:
                next_occurrences[entity_name, lhs_part,
                                 next_lhs_subpart].add(Side.LHS)
        if next_rhs_subpart is not None:
            for entity_name in rhs_types:
                next_occurrences[entity_name, rhs_part,
                                 next_rhs_subpart].add(Side.RHS)

        tk.start("copy_from_device")
        for (entity_name, part,
             subpart), (gpu_embeddings,
                        gpu_optimizer) in list(self.sub_holder.items()):
            if (entity_name, part, subpart) in next_occurrences:
                continue
            embeddings = all_embs[entity_name, part]
            optimizer = trainer.partitioned_optimizers[entity_name, part]
            subpart_slice = subpart_slices[entity_name, part, subpart]

            embeddings[subpart_slice].data.copy_(gpu_embeddings.detach(),
                                                 non_blocking=True)
            del gpu_embeddings
            (cpu_state, ) = optimizer.state.values()
            (gpu_state, ) = gpu_optimizer.state.values()
            cpu_state["sum"][subpart_slice].copy_(gpu_state["sum"],
                                                  non_blocking=True)
            del gpu_state["sum"]
            del self.sub_holder[entity_name, part, subpart]
        logger.debug(
            f"Time spent copying subparts from GPU: {tk.stop('copy_from_device'):.4f} s"
        )

        logger.debug(
            f"do_one_job: Time unaccounted for: {tk.unaccounted():.4f} s")

        return stats