예제 #1
0
def get_pipeline_module(*args: Any, **kwargs: Any) -> PipelineModule:
    """Create pipeline module with correct topology type."""
    with mock.patch.object(PipelineModule, 'to', mock.MagicMock()):
        m = PipelineModule(*args, **kwargs)
    m._topo = PipeModelDataParallelTopology(
        num_pp=m.num_stages,
        num_dp=dist.get_world_size(m.world_group) // m.num_stages,
        num_mp=1,
    )
    return m
예제 #2
0
def train_pipe(args, part='parameters'):
    torch.manual_seed(args.seed)
    deepspeed.runtime.utils.set_random_seed(args.seed)

    #
    # Build the model
    #

    # VGG also works :-)
    #net = vgg19(num_classes=10)
    net = AlexNet(num_classes=10)
    net = PipelineModule(layers=join_layers(net),
                         loss_fn=torch.nn.CrossEntropyLoss(),
                         num_stages=args.pipeline_parallel_size,
                         partition_method=part,
                         activation_checkpoint_interval=0)

    trainset = cifar_trainset(args.local_rank)

    engine, _, _, _ = deepspeed.initialize(
        args=args,
        model=net,
        model_parameters=[p for p in net.parameters() if p.requires_grad],
        training_data=trainset)

    for step in range(args.steps):
        loss = engine.train_batch()
예제 #3
0
    def _helper():
        base_model = copy.deepcopy(sequential_model)
        base_input = batch_input.clone().detach()
        base_output = base_model(base_input)
        base_output = base_output
        base_params = sum(p.numel() for p in base_model.parameters())

        pipe_model = copy.deepcopy(sequential_model)
        pipe_model = PipelineModule(layers=pipe_model, num_stages=4)

        # Ensure all parameters are accounted for.
        my_params = sum(p.numel() for p in pipe_model.parameters())
        total_pipe_params = torch.LongTensor([my_params]).to('cuda')
        dist.all_reduce(total_pipe_params)
        total_pipe_params = total_pipe_params.item()
        assert total_pipe_params == base_params

        pipe_model, _, _, _ = deepspeed.initialize(
            args=simple_args,
            model=pipe_model,
            model_parameters=[p for p in pipe_model.parameters()])

        if pipe_model.is_first_stage or pipe_model.is_last_stage:
            pipe_input = base_input.clone().detach().to('cuda')
            # label 0 is meaningless
            dataset = [(pipe_input, 0)]
            loader = RepeatingLoader(dataset)
            data_iter = iter(loader)
        else:
            data_iter = None

        pipe_output = pipe_model.eval_batch(data_iter=data_iter)

        base_output = base_output.to('cpu')
        pipe_output = pipe_output.to('cpu')

        assert torch.allclose(base_output, pipe_output)
예제 #4
0
    def __init__(self, context: DeepSpeedTrialContext) -> None:
        self.context = context
        self.args = AttrDict(self.context.get_hparams())
        model = AlexNet(10)
        model = PipelineModule(
            layers=join_layers(model),
            loss_fn=torch.nn.CrossEntropyLoss(),
            num_stages=self.args.pipe_parallel_size,
            partition_method=self.args.part,
            activation_checkpoint_interval=0,
        )

        ds_config = overwrite_deepspeed_config(
            self.args.deepspeed_config,
            self.args.get("overwrite_deepspeed_args", {}))
        model_engine, optimizer, _, _ = deepspeed.initialize(
            args=self.args,
            model=model,
            model_parameters=[
                p for p in model.parameters() if p.requires_grad
            ],
            config=ds_config,
        )
        self.model_engine = self.context.wrap_model_engine(model_engine)
예제 #5
0

else:
    model = nn.Sequential(
        ORTModule(nn.Linear(d_in, d_hidden).to(device)),     # Stage 1
        nn.ReLU().to(device),                                # ORTModule(nn.ReLU().to(device)), Stage 1, TODO: ORTModule can wrap Relu once stateless model is supported.
        ORTModule(nn.Linear(d_hidden, d_hidden).to(device)), # Stage 1
        nn.ReLU().to(device),                                # ORTModule(nn.ReLU().to(device)), Stage 1, TODO: ORTModule can wrap Relu once stateless model is supported.
        ORTModule(nn.Linear(d_hidden, d_hidden).to(device)), # Stage 2
        nn.ReLU().to(device),                                # ORTModule(nn.ReLU().to(device)), Stage 2, TODO: ORTModule can wrap Relu once stateless model is supported.
        ORTModule(nn.Linear(d_hidden, d_out).to(device))     # Stage 2
    )

model = PipelineModule(layers=model,
            loss_fn=torch.nn.CrossEntropyLoss(),
            num_stages=args.pipeline_parallel_size,
            partition_method='uniform', #'parameters',
            activation_checkpoint_interval=0
            )

params = [p for p in model.parameters() if p.requires_grad]

# Input.
x = torch.rand((n, d_in))
if args.fp16:
    x = x.half()
# Output.
y = torch.randint(0, d_out, (n,))
ds = SampleData(x,y)

print("Initialize deepspeed")
model_engine, optimizer, _, _ = deepspeed.initialize(args=args,
예제 #6
0
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
])

trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform)

deepspeed.init_distributed()
net = AlexNet(num_classes=10)
net = PipelineModule(layers=join_layers(net),
                     loss_fn=torch.nn.CrossEntropyLoss(),
                     num_stages=2,
                     partition_method="parameters",
                     activation_checkpoint_interval=0)

args = add_argument()
engine, optimizer, trainloader, __ = deepspeed.initialize(
    args=args,
    model=net,
    model_parameters=[p for p in net.parameters() if p.requires_grad],
    training_data=trainset)

for step in range(steps):
    loss = engine.train_batch()
    print(loss)
# deepspeed --hostfile=./hostfile model_parallel/deepspeed/tutorial.py --deepspeed --deepspeed_config model_parallel/deepspeed/ds_config.json